MIPS32: Fill branch delay slots
Test: booted MIPS32 in QEMU
Test: test-art-host-gtest
Test: test-art-target-gtest
Test: test-art-target-run-test-optimizing on CI20
Change-Id: I727e80753395ab99fff004cb5d2e0a06409150d7
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 8981776..b616057 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -362,6 +362,16 @@
// Size of generated code
virtual size_t CodeSize() const { return buffer_.Size(); }
virtual const uint8_t* CodeBufferBaseAddress() const { return buffer_.contents(); }
+ // CodePosition() is a non-const method similar to CodeSize(), which is used to
+ // record positions within the code buffer for the purpose of signal handling
+ // (stack overflow checks and implicit null checks may trigger signals and the
+ // signal handlers expect them right before the recorded positions).
+ // On most architectures CodePosition() should be equivalent to CodeSize(), but
+ // the MIPS assembler needs to be aware of this recording, so it doesn't put
+ // the instructions that can trigger signals into branch delay slots. Handling
+ // signals from instructions in delay slots is a bit problematic and should be
+ // avoided.
+ virtual size_t CodePosition() { return CodeSize(); }
// Copy instructions out of assembly buffer into the given region of memory
virtual void FinalizeInstructions(const MemoryRegion& region) {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index bfc63d1..4b580b6 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -40,10 +40,195 @@
return os;
}
+MipsAssembler::DelaySlot::DelaySlot()
+ : instruction_(0),
+ gpr_outs_mask_(0),
+ gpr_ins_mask_(0),
+ fpr_outs_mask_(0),
+ fpr_ins_mask_(0),
+ cc_outs_mask_(0),
+ cc_ins_mask_(0) {}
+
+void MipsAssembler::DsFsmInstr(uint32_t instruction,
+ uint32_t gpr_outs_mask,
+ uint32_t gpr_ins_mask,
+ uint32_t fpr_outs_mask,
+ uint32_t fpr_ins_mask,
+ uint32_t cc_outs_mask,
+ uint32_t cc_ins_mask) {
+ if (!reordering_) {
+ CHECK_EQ(ds_fsm_state_, kExpectingLabel);
+ CHECK_EQ(delay_slot_.instruction_, 0u);
+ return;
+ }
+ switch (ds_fsm_state_) {
+ case kExpectingLabel:
+ break;
+ case kExpectingInstruction:
+ CHECK_EQ(ds_fsm_target_pc_ + sizeof(uint32_t), buffer_.Size());
+ // If the last instruction is not suitable for delay slots, drop
+ // the PC of the label preceding it so that no unconditional branch
+ // uses this instruction to fill its delay slot.
+ if (instruction == 0) {
+ DsFsmDropLabel(); // Sets ds_fsm_state_ = kExpectingLabel.
+ } else {
+ // Otherwise wait for another instruction or label before we can
+ // commit the label PC. The label PC will be dropped if instead
+ // of another instruction or label there's a call from the code
+ // generator to CodePosition() to record the buffer size.
+ // Instructions after which the buffer size is recorded cannot
+ // be moved into delay slots or anywhere else because they may
+ // trigger signals and the signal handlers expect these signals
+ // to be coming from the instructions immediately preceding the
+ // recorded buffer locations.
+ ds_fsm_state_ = kExpectingCommit;
+ }
+ break;
+ case kExpectingCommit:
+ CHECK_EQ(ds_fsm_target_pc_ + 2 * sizeof(uint32_t), buffer_.Size());
+ DsFsmCommitLabel(); // Sets ds_fsm_state_ = kExpectingLabel.
+ break;
+ }
+ delay_slot_.instruction_ = instruction;
+ delay_slot_.gpr_outs_mask_ = gpr_outs_mask & ~1u; // Ignore register ZERO.
+ delay_slot_.gpr_ins_mask_ = gpr_ins_mask & ~1u; // Ignore register ZERO.
+ delay_slot_.fpr_outs_mask_ = fpr_outs_mask;
+ delay_slot_.fpr_ins_mask_ = fpr_ins_mask;
+ delay_slot_.cc_outs_mask_ = cc_outs_mask;
+ delay_slot_.cc_ins_mask_ = cc_ins_mask;
+}
+
+void MipsAssembler::DsFsmLabel() {
+ if (!reordering_) {
+ CHECK_EQ(ds_fsm_state_, kExpectingLabel);
+ CHECK_EQ(delay_slot_.instruction_, 0u);
+ return;
+ }
+ switch (ds_fsm_state_) {
+ case kExpectingLabel:
+ ds_fsm_target_pc_ = buffer_.Size();
+ ds_fsm_state_ = kExpectingInstruction;
+ break;
+ case kExpectingInstruction:
+ // Allow consecutive labels.
+ CHECK_EQ(ds_fsm_target_pc_, buffer_.Size());
+ break;
+ case kExpectingCommit:
+ CHECK_EQ(ds_fsm_target_pc_ + sizeof(uint32_t), buffer_.Size());
+ DsFsmCommitLabel();
+ ds_fsm_target_pc_ = buffer_.Size();
+ ds_fsm_state_ = kExpectingInstruction;
+ break;
+ }
+ // We cannot move instructions into delay slots across labels.
+ delay_slot_.instruction_ = 0;
+}
+
+void MipsAssembler::DsFsmCommitLabel() {
+ if (ds_fsm_state_ == kExpectingCommit) {
+ ds_fsm_target_pcs_.emplace_back(ds_fsm_target_pc_);
+ }
+ ds_fsm_state_ = kExpectingLabel;
+}
+
+void MipsAssembler::DsFsmDropLabel() {
+ ds_fsm_state_ = kExpectingLabel;
+}
+
+bool MipsAssembler::SetReorder(bool enable) {
+ bool last_state = reordering_;
+ if (last_state != enable) {
+ DsFsmCommitLabel();
+ DsFsmInstrNop(0);
+ }
+ reordering_ = enable;
+ return last_state;
+}
+
+size_t MipsAssembler::CodePosition() {
+ // The last instruction cannot be used in a delay slot, do not commit
+ // the label before it (if any) and clear the delay slot.
+ DsFsmDropLabel();
+ DsFsmInstrNop(0);
+ size_t size = buffer_.Size();
+ // In theory we can get the following sequence:
+ // label1:
+ // instr
+ // label2: # label1 gets committed when label2 is seen
+ // CodePosition() call
+ // and we need to uncommit label1.
+ if (ds_fsm_target_pcs_.size() != 0 && ds_fsm_target_pcs_.back() + sizeof(uint32_t) == size) {
+ ds_fsm_target_pcs_.pop_back();
+ }
+ return size;
+}
+
+void MipsAssembler::DsFsmInstrNop(uint32_t instruction ATTRIBUTE_UNUSED) {
+ DsFsmInstr(0, 0, 0, 0, 0, 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) {
+ DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction,
+ Register in1_out,
+ Register in2,
+ Register in3) {
+ DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0, 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrFff(uint32_t instruction,
+ FRegister out,
+ FRegister in1,
+ FRegister in2) {
+ DsFsmInstr(instruction, 0, 0, (1u << out), (1u << in1) | (1u << in2), 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrFfff(uint32_t instruction,
+ FRegister in1_out,
+ FRegister in2,
+ FRegister in3) {
+ DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrRf(uint32_t instruction, Register out, FRegister in) {
+ DsFsmInstr(instruction, (1u << out), 0, 0, (1u << in), 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrFr(uint32_t instruction, FRegister out, Register in) {
+ DsFsmInstr(instruction, 0, (1u << in), (1u << out), 0, 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2) {
+ DsFsmInstr(instruction, 0, (1u << in2), 0, (1u << in1), 0, 0);
+}
+
+void MipsAssembler::DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2) {
+ DsFsmInstr(instruction, 0, 0, 0, (1u << in1) | (1u << in2), (1 << cc_out), 0);
+}
+
+void MipsAssembler::DsFsmInstrRrrc(uint32_t instruction,
+ Register in1_out,
+ Register in2,
+ int cc_in) {
+ DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0, 0, (1 << cc_in));
+}
+
+void MipsAssembler::DsFsmInstrFffc(uint32_t instruction,
+ FRegister in1_out,
+ FRegister in2,
+ int cc_in) {
+ DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, (1 << cc_in));
+}
+
void MipsAssembler::FinalizeCode() {
for (auto& exception_block : exception_blocks_) {
EmitExceptionPoll(&exception_block);
}
+ // Commit the last branch target label (if any) and disable instruction reordering.
+ DsFsmCommitLabel();
+ SetReorder(false);
EmitLiterals();
PromoteBranches();
}
@@ -107,6 +292,12 @@
void MipsAssembler::EmitBranches() {
CHECK(!overwriting_);
+ CHECK(!reordering_);
+ // Now that everything has its final position in the buffer (the branches have
+ // been promoted), adjust the target label PCs.
+ for (size_t cnt = ds_fsm_target_pcs_.size(), i = 0; i < cnt; i++) {
+ ds_fsm_target_pcs_[i] = GetAdjustedPosition(ds_fsm_target_pcs_[i]);
+ }
// Switch from appending instructions at the end of the buffer to overwriting
// existing instructions (branch placeholders) in the buffer.
overwriting_ = true;
@@ -128,7 +319,12 @@
}
}
-void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct) {
+uint32_t MipsAssembler::EmitR(int opcode,
+ Register rs,
+ Register rt,
+ Register rd,
+ int shamt,
+ int funct) {
CHECK_NE(rs, kNoRegister);
CHECK_NE(rt, kNoRegister);
CHECK_NE(rd, kNoRegister);
@@ -139,9 +335,10 @@
shamt << kShamtShift |
funct;
Emit(encoding);
+ return encoding;
}
-void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) {
+uint32_t MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) {
CHECK_NE(rs, kNoRegister);
CHECK_NE(rt, kNoRegister);
uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
@@ -149,25 +346,32 @@
static_cast<uint32_t>(rt) << kRtShift |
imm;
Emit(encoding);
+ return encoding;
}
-void MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) {
+uint32_t MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) {
CHECK_NE(rs, kNoRegister);
CHECK(IsUint<21>(imm21)) << imm21;
uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
static_cast<uint32_t>(rs) << kRsShift |
imm21;
Emit(encoding);
+ return encoding;
}
-void MipsAssembler::EmitI26(int opcode, uint32_t imm26) {
+uint32_t MipsAssembler::EmitI26(int opcode, uint32_t imm26) {
CHECK(IsUint<26>(imm26)) << imm26;
uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
Emit(encoding);
+ return encoding;
}
-void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd,
- int funct) {
+uint32_t MipsAssembler::EmitFR(int opcode,
+ int fmt,
+ FRegister ft,
+ FRegister fs,
+ FRegister fd,
+ int funct) {
CHECK_NE(ft, kNoFRegister);
CHECK_NE(fs, kNoFRegister);
CHECK_NE(fd, kNoFRegister);
@@ -178,52 +382,54 @@
static_cast<uint32_t>(fd) << kFdShift |
funct;
Emit(encoding);
+ return encoding;
}
-void MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) {
+uint32_t MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) {
CHECK_NE(ft, kNoFRegister);
uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
fmt << kFmtShift |
static_cast<uint32_t>(ft) << kFtShift |
imm;
Emit(encoding);
+ return encoding;
}
void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x21);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
}
void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x9, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x23);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x23), rd, rs, rt);
}
void MipsAssembler::MultR2(Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18);
+ DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18), ZERO, rs, rt);
}
void MipsAssembler::MultuR2(Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19);
+ DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19), ZERO, rs, rt);
}
void MipsAssembler::DivR2(Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a);
+ DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a), ZERO, rs, rt);
}
void MipsAssembler::DivuR2(Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b);
+ DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b), ZERO, rs, rt);
}
void MipsAssembler::MulR2(Register rd, Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0x1c, rs, rt, rd, 0, 2);
+ DsFsmInstrRrr(EmitR(0x1c, rs, rt, rd, 0, 2), rd, rs, rt);
}
void MipsAssembler::DivR2(Register rd, Register rs, Register rt) {
@@ -252,308 +458,307 @@
void MipsAssembler::MulR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 2, 0x18);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x18), rd, rs, rt);
}
void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 3, 0x18);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x18), rd, rs, rt);
}
void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 3, 0x19);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x19), rd, rs, rt);
}
void MipsAssembler::DivR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 2, 0x1a);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1a), rd, rs, rt);
}
void MipsAssembler::ModR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 3, 0x1a);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1a), rd, rs, rt);
}
void MipsAssembler::DivuR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 2, 0x1b);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1b), rd, rs, rt);
}
void MipsAssembler::ModuR6(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 3, 0x1b);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1b), rd, rs, rt);
}
void MipsAssembler::And(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x24);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x24), rd, rs, rt);
}
void MipsAssembler::Andi(Register rt, Register rs, uint16_t imm16) {
- EmitI(0xc, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0xc, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Or(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x25);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x25), rd, rs, rt);
}
void MipsAssembler::Ori(Register rt, Register rs, uint16_t imm16) {
- EmitI(0xd, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0xd, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Xor(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x26);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x26), rd, rs, rt);
}
void MipsAssembler::Xori(Register rt, Register rs, uint16_t imm16) {
- EmitI(0xe, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0xe, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Nor(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x27);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x27), rd, rs, rt);
}
void MipsAssembler::Movz(Register rd, Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0, rs, rt, rd, 0, 0x0A);
+ DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0A), rd, rs, rt);
}
void MipsAssembler::Movn(Register rd, Register rs, Register rt) {
CHECK(!IsR6());
- EmitR(0, rs, rt, rd, 0, 0x0B);
+ DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0B), rd, rs, rt);
}
void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 0, 0x35);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x35), rd, rs, rt);
}
void MipsAssembler::Selnez(Register rd, Register rs, Register rt) {
CHECK(IsR6());
- EmitR(0, rs, rt, rd, 0, 0x37);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x37), rd, rs, rt);
}
void MipsAssembler::ClzR6(Register rd, Register rs) {
CHECK(IsR6());
- EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10);
+ DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10), rd, rs, rs);
}
void MipsAssembler::ClzR2(Register rd, Register rs) {
CHECK(!IsR6());
- EmitR(0x1C, rs, rd, rd, 0, 0x20);
+ DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x20), rd, rs, rs);
}
void MipsAssembler::CloR6(Register rd, Register rs) {
CHECK(IsR6());
- EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11);
+ DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11), rd, rs, rs);
}
void MipsAssembler::CloR2(Register rd, Register rs) {
CHECK(!IsR6());
- EmitR(0x1C, rs, rd, rd, 0, 0x21);
+ DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x21), rd, rs, rs);
}
void MipsAssembler::Seb(Register rd, Register rt) {
- EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20);
+ DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20), rd, rt, rt);
}
void MipsAssembler::Seh(Register rd, Register rt) {
- EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
+ DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20), rd, rt, rt);
}
void MipsAssembler::Wsbh(Register rd, Register rt) {
- EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
+ DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20), rd, rt, rt);
}
void MipsAssembler::Bitswap(Register rd, Register rt) {
CHECK(IsR6());
- EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20);
+ DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20), rd, rt, rt);
}
void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
CHECK(IsUint<5>(shamt)) << shamt;
- EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
+ DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00), rd, rt, rt);
}
void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
CHECK(IsUint<5>(shamt)) << shamt;
- EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
+ DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02), rd, rt, rt);
}
void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
CHECK(IsUint<5>(shamt)) << shamt;
- EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02);
+ DsFsmInstrRrr(EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02), rd, rt, rt);
}
void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
CHECK(IsUint<5>(shamt)) << shamt;
- EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
+ DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03), rd, rt, rt);
}
void MipsAssembler::Sllv(Register rd, Register rt, Register rs) {
- EmitR(0, rs, rt, rd, 0, 0x04);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x04), rd, rs, rt);
}
void MipsAssembler::Srlv(Register rd, Register rt, Register rs) {
- EmitR(0, rs, rt, rd, 0, 0x06);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x06), rd, rs, rt);
}
void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) {
- EmitR(0, rs, rt, rd, 1, 0x06);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 1, 0x06), rd, rs, rt);
}
void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
- EmitR(0, rs, rt, rd, 0, 0x07);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x07), rd, rs, rt);
}
void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) {
CHECK(IsUint<5>(pos)) << pos;
CHECK(0 < size && size <= 32) << size;
CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
- EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00);
+ DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00), rd, rt, rt);
}
void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) {
CHECK(IsUint<5>(pos)) << pos;
CHECK(0 < size && size <= 32) << size;
CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
- EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04);
+ DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt);
}
void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x20, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x20, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Lh(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x21, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x23, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
CHECK(!IsR6());
- EmitI(0x22, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x22, rs, rt, imm16), rt, rt, rs);
}
void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) {
CHECK(!IsR6());
- EmitI(0x26, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x26, rs, rt, imm16), rt, rt, rs);
}
void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x24, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x24, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x25, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x25, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Lwpc(Register rs, uint32_t imm19) {
CHECK(IsR6());
CHECK(IsUint<19>(imm19)) << imm19;
- EmitI21(0x3B, rs, (0x01 << 19) | imm19);
+ DsFsmInstrNop(EmitI21(0x3B, rs, (0x01 << 19) | imm19));
}
void MipsAssembler::Lui(Register rt, uint16_t imm16) {
- EmitI(0xf, static_cast<Register>(0), rt, imm16);
+ DsFsmInstrRrr(EmitI(0xf, static_cast<Register>(0), rt, imm16), rt, ZERO, ZERO);
}
void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) {
CHECK(IsR6());
- EmitI(0xf, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0xf, rs, rt, imm16), rt, rt, rs);
}
void MipsAssembler::Sync(uint32_t stype) {
- EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0),
- stype & 0x1f, 0xf);
+ DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, stype & 0x1f, 0xf));
}
void MipsAssembler::Mfhi(Register rd) {
CHECK(!IsR6());
- EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x10);
+ DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x10), rd, ZERO, ZERO);
}
void MipsAssembler::Mflo(Register rd) {
CHECK(!IsR6());
- EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x12);
+ DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x12), rd, ZERO, ZERO);
}
void MipsAssembler::Sb(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x28, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x28, rs, rt, imm16), ZERO, rt, rs);
}
void MipsAssembler::Sh(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x29, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs);
}
void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) {
- EmitI(0x2b, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs);
}
void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
CHECK(!IsR6());
- EmitI(0x2a, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x2a, rs, rt, imm16), ZERO, rt, rs);
}
void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) {
CHECK(!IsR6());
- EmitI(0x2e, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x2e, rs, rt, imm16), ZERO, rt, rs);
}
void MipsAssembler::LlR2(Register rt, Register base, int16_t imm16) {
CHECK(!IsR6());
- EmitI(0x30, base, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x30, base, rt, imm16), rt, base, base);
}
void MipsAssembler::ScR2(Register rt, Register base, int16_t imm16) {
CHECK(!IsR6());
- EmitI(0x38, base, rt, imm16);
+ DsFsmInstrRrr(EmitI(0x38, base, rt, imm16), rt, rt, base);
}
void MipsAssembler::LlR6(Register rt, Register base, int16_t imm9) {
CHECK(IsR6());
CHECK(IsInt<9>(imm9));
- EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36);
+ DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36), rt, base, base);
}
void MipsAssembler::ScR6(Register rt, Register base, int16_t imm9) {
CHECK(IsR6());
CHECK(IsInt<9>(imm9));
- EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26);
+ DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26), rt, rt, base);
}
void MipsAssembler::Slt(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x2a);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2a), rd, rs, rt);
}
void MipsAssembler::Sltu(Register rd, Register rs, Register rt) {
- EmitR(0, rs, rt, rd, 0, 0x2b);
+ DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2b), rd, rs, rt);
}
void MipsAssembler::Slti(Register rt, Register rs, uint16_t imm16) {
- EmitI(0xa, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0xa, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::Sltiu(Register rt, Register rs, uint16_t imm16) {
- EmitI(0xb, rs, rt, imm16);
+ DsFsmInstrRrr(EmitI(0xb, rs, rt, imm16), rt, rs, rs);
}
void MipsAssembler::B(uint16_t imm16) {
- EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
+ DsFsmInstrNop(EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16));
}
void MipsAssembler::Bal(uint16_t imm16) {
- EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16);
+ DsFsmInstrNop(EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16));
}
void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
- EmitI(0x4, rs, rt, imm16);
+ DsFsmInstrNop(EmitI(0x4, rs, rt, imm16));
}
void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) {
- EmitI(0x5, rs, rt, imm16);
+ DsFsmInstrNop(EmitI(0x5, rs, rt, imm16));
}
void MipsAssembler::Beqz(Register rt, uint16_t imm16) {
@@ -565,19 +770,19 @@
}
void MipsAssembler::Bltz(Register rt, uint16_t imm16) {
- EmitI(0x1, rt, static_cast<Register>(0), imm16);
+ DsFsmInstrNop(EmitI(0x1, rt, static_cast<Register>(0), imm16));
}
void MipsAssembler::Bgez(Register rt, uint16_t imm16) {
- EmitI(0x1, rt, static_cast<Register>(0x1), imm16);
+ DsFsmInstrNop(EmitI(0x1, rt, static_cast<Register>(0x1), imm16));
}
void MipsAssembler::Blez(Register rt, uint16_t imm16) {
- EmitI(0x6, rt, static_cast<Register>(0), imm16);
+ DsFsmInstrNop(EmitI(0x6, rt, static_cast<Register>(0), imm16));
}
void MipsAssembler::Bgtz(Register rt, uint16_t imm16) {
- EmitI(0x7, rt, static_cast<Register>(0), imm16);
+ DsFsmInstrNop(EmitI(0x7, rt, static_cast<Register>(0), imm16));
}
void MipsAssembler::Bc1f(uint16_t imm16) {
@@ -587,7 +792,7 @@
void MipsAssembler::Bc1f(int cc, uint16_t imm16) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16);
+ DsFsmInstrNop(EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16));
}
void MipsAssembler::Bc1t(uint16_t imm16) {
@@ -597,19 +802,45 @@
void MipsAssembler::Bc1t(int cc, uint16_t imm16) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16);
+ DsFsmInstrNop(EmitI(0x11,
+ static_cast<Register>(0x8),
+ static_cast<Register>((cc << 2) | 1),
+ imm16));
}
void MipsAssembler::J(uint32_t addr26) {
- EmitI26(0x2, addr26);
+ DsFsmInstrNop(EmitI26(0x2, addr26));
}
void MipsAssembler::Jal(uint32_t addr26) {
- EmitI26(0x3, addr26);
+ DsFsmInstrNop(EmitI26(0x3, addr26));
}
void MipsAssembler::Jalr(Register rd, Register rs) {
- EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09);
+ uint32_t last_instruction = delay_slot_.instruction_;
+ bool exchange = (last_instruction != 0 &&
+ (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 &&
+ ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0);
+ if (exchange) {
+ // The last instruction cannot be used in a different delay slot,
+ // do not commit the label before it (if any).
+ DsFsmDropLabel();
+ }
+ DsFsmInstrNop(EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09));
+ if (exchange) {
+ // Exchange the last two instructions in the assembler buffer.
+ size_t size = buffer_.Size();
+ CHECK_GE(size, 2 * sizeof(uint32_t));
+ size_t pos1 = size - 2 * sizeof(uint32_t);
+ size_t pos2 = size - sizeof(uint32_t);
+ uint32_t instr1 = buffer_.Load<uint32_t>(pos1);
+ uint32_t instr2 = buffer_.Load<uint32_t>(pos2);
+ CHECK_EQ(instr1, last_instruction);
+ buffer_.Store<uint32_t>(pos1, instr2);
+ buffer_.Store<uint32_t>(pos2, instr1);
+ } else if (reordering_) {
+ Nop();
+ }
}
void MipsAssembler::Jalr(Register rs) {
@@ -621,38 +852,38 @@
}
void MipsAssembler::Nal() {
- EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0);
+ DsFsmInstrNop(EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0));
}
void MipsAssembler::Auipc(Register rs, uint16_t imm16) {
CHECK(IsR6());
- EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16);
+ DsFsmInstrNop(EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16));
}
void MipsAssembler::Addiupc(Register rs, uint32_t imm19) {
CHECK(IsR6());
CHECK(IsUint<19>(imm19)) << imm19;
- EmitI21(0x3B, rs, imm19);
+ DsFsmInstrNop(EmitI21(0x3B, rs, imm19));
}
void MipsAssembler::Bc(uint32_t imm26) {
CHECK(IsR6());
- EmitI26(0x32, imm26);
+ DsFsmInstrNop(EmitI26(0x32, imm26));
}
void MipsAssembler::Balc(uint32_t imm26) {
CHECK(IsR6());
- EmitI26(0x3A, imm26);
+ DsFsmInstrNop(EmitI26(0x3A, imm26));
}
void MipsAssembler::Jic(Register rt, uint16_t imm16) {
CHECK(IsR6());
- EmitI(0x36, static_cast<Register>(0), rt, imm16);
+ DsFsmInstrNop(EmitI(0x36, static_cast<Register>(0), rt, imm16));
}
void MipsAssembler::Jialc(Register rt, uint16_t imm16) {
CHECK(IsR6());
- EmitI(0x3E, static_cast<Register>(0), rt, imm16);
+ DsFsmInstrNop(EmitI(0x3E, static_cast<Register>(0), rt, imm16));
}
void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) {
@@ -660,19 +891,19 @@
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x17, rs, rt, imm16);
+ DsFsmInstrNop(EmitI(0x17, rs, rt, imm16));
}
void MipsAssembler::Bltzc(Register rt, uint16_t imm16) {
CHECK(IsR6());
CHECK_NE(rt, ZERO);
- EmitI(0x17, rt, rt, imm16);
+ DsFsmInstrNop(EmitI(0x17, rt, rt, imm16));
}
void MipsAssembler::Bgtzc(Register rt, uint16_t imm16) {
CHECK(IsR6());
CHECK_NE(rt, ZERO);
- EmitI(0x17, static_cast<Register>(0), rt, imm16);
+ DsFsmInstrNop(EmitI(0x17, static_cast<Register>(0), rt, imm16));
}
void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) {
@@ -680,19 +911,19 @@
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x16, rs, rt, imm16);
+ DsFsmInstrNop(EmitI(0x16, rs, rt, imm16));
}
void MipsAssembler::Bgezc(Register rt, uint16_t imm16) {
CHECK(IsR6());
CHECK_NE(rt, ZERO);
- EmitI(0x16, rt, rt, imm16);
+ DsFsmInstrNop(EmitI(0x16, rt, rt, imm16));
}
void MipsAssembler::Blezc(Register rt, uint16_t imm16) {
CHECK(IsR6());
CHECK_NE(rt, ZERO);
- EmitI(0x16, static_cast<Register>(0), rt, imm16);
+ DsFsmInstrNop(EmitI(0x16, static_cast<Register>(0), rt, imm16));
}
void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) {
@@ -700,7 +931,7 @@
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x7, rs, rt, imm16);
+ DsFsmInstrNop(EmitI(0x7, rs, rt, imm16));
}
void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) {
@@ -708,7 +939,7 @@
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x6, rs, rt, imm16);
+ DsFsmInstrNop(EmitI(0x6, rs, rt, imm16));
}
void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) {
@@ -716,7 +947,7 @@
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
+ DsFsmInstrNop(EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16));
}
void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) {
@@ -724,29 +955,29 @@
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
+ DsFsmInstrNop(EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16));
}
void MipsAssembler::Beqzc(Register rs, uint32_t imm21) {
CHECK(IsR6());
CHECK_NE(rs, ZERO);
- EmitI21(0x36, rs, imm21);
+ DsFsmInstrNop(EmitI21(0x36, rs, imm21));
}
void MipsAssembler::Bnezc(Register rs, uint32_t imm21) {
CHECK(IsR6());
CHECK_NE(rs, ZERO);
- EmitI21(0x3E, rs, imm21);
+ DsFsmInstrNop(EmitI21(0x3E, rs, imm21));
}
void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) {
CHECK(IsR6());
- EmitFI(0x11, 0x9, ft, imm16);
+ DsFsmInstrNop(EmitFI(0x11, 0x9, ft, imm16));
}
void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) {
CHECK(IsR6());
- EmitFI(0x11, 0xD, ft, imm16);
+ DsFsmInstrNop(EmitFI(0x11, 0xD, ft, imm16));
}
void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
@@ -868,67 +1099,67 @@
}
void MipsAssembler::AddS(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x0), fd, fs, ft);
}
void MipsAssembler::SubS(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x10, ft, fs, fd, 0x1);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1), fd, fs, ft);
}
void MipsAssembler::MulS(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x10, ft, fs, fd, 0x2);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x2), fd, fs, ft);
}
void MipsAssembler::DivS(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x10, ft, fs, fd, 0x3);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x3), fd, fs, ft);
}
void MipsAssembler::AddD(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x11, ft, fs, fd, 0x0);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x0), fd, fs, ft);
}
void MipsAssembler::SubD(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x11, ft, fs, fd, 0x1);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1), fd, fs, ft);
}
void MipsAssembler::MulD(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x11, ft, fs, fd, 0x2);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x2), fd, fs, ft);
}
void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) {
- EmitFR(0x11, 0x11, ft, fs, fd, 0x3);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x3), fd, fs, ft);
}
void MipsAssembler::SqrtS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs);
}
void MipsAssembler::SqrtD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs);
}
void MipsAssembler::AbsS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs);
}
void MipsAssembler::AbsD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs);
}
void MipsAssembler::MovS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs);
}
void MipsAssembler::MovD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs);
}
void MipsAssembler::NegS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs);
}
void MipsAssembler::NegD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs);
}
void MipsAssembler::CunS(FRegister fs, FRegister ft) {
@@ -938,7 +1169,7 @@
void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft);
}
void MipsAssembler::CeqS(FRegister fs, FRegister ft) {
@@ -948,7 +1179,7 @@
void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft);
}
void MipsAssembler::CueqS(FRegister fs, FRegister ft) {
@@ -958,7 +1189,7 @@
void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft);
}
void MipsAssembler::ColtS(FRegister fs, FRegister ft) {
@@ -968,7 +1199,7 @@
void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft);
}
void MipsAssembler::CultS(FRegister fs, FRegister ft) {
@@ -978,7 +1209,7 @@
void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft);
}
void MipsAssembler::ColeS(FRegister fs, FRegister ft) {
@@ -988,7 +1219,7 @@
void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft);
}
void MipsAssembler::CuleS(FRegister fs, FRegister ft) {
@@ -998,7 +1229,7 @@
void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+ DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft);
}
void MipsAssembler::CunD(FRegister fs, FRegister ft) {
@@ -1008,7 +1239,7 @@
void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft);
}
void MipsAssembler::CeqD(FRegister fs, FRegister ft) {
@@ -1018,7 +1249,7 @@
void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft);
}
void MipsAssembler::CueqD(FRegister fs, FRegister ft) {
@@ -1028,7 +1259,7 @@
void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft);
}
void MipsAssembler::ColtD(FRegister fs, FRegister ft) {
@@ -1038,7 +1269,7 @@
void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft);
}
void MipsAssembler::CultD(FRegister fs, FRegister ft) {
@@ -1048,7 +1279,7 @@
void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft);
}
void MipsAssembler::ColeD(FRegister fs, FRegister ft) {
@@ -1058,7 +1289,7 @@
void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft);
}
void MipsAssembler::CuleD(FRegister fs, FRegister ft) {
@@ -1068,247 +1299,261 @@
void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+ DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft);
}
void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x01), fd, fs, ft);
}
void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x02), fd, fs, ft);
}
void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x03), fd, fs, ft);
}
void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x04), fd, fs, ft);
}
void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x05), fd, fs, ft);
}
void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x06), fd, fs, ft);
}
void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x07), fd, fs, ft);
}
void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x11), fd, fs, ft);
}
void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x12), fd, fs, ft);
}
void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x13), fd, fs, ft);
}
void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x01), fd, fs, ft);
}
void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x02), fd, fs, ft);
}
void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x03), fd, fs, ft);
}
void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x04), fd, fs, ft);
}
void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x05), fd, fs, ft);
}
void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x06), fd, fs, ft);
}
void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x07), fd, fs, ft);
}
void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x11), fd, fs, ft);
}
void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x12), fd, fs, ft);
}
void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x13), fd, fs, ft);
}
void MipsAssembler::Movf(Register rd, Register rs, int cc) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01);
+ DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01), rd, rs, cc);
}
void MipsAssembler::Movt(Register rd, Register rs, int cc) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
+ DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01), rd, rs, cc);
}
void MipsAssembler::MovfS(FRegister fd, FRegister fs, int cc) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11);
+ DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc);
}
void MipsAssembler::MovfD(FRegister fd, FRegister fs, int cc) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11);
+ DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc);
}
void MipsAssembler::MovtS(FRegister fd, FRegister fs, int cc) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11);
+ DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11),
+ fd,
+ fs,
+ cc);
}
void MipsAssembler::MovtD(FRegister fd, FRegister fs, int cc) {
CHECK(!IsR6());
CHECK(IsUint<3>(cc)) << cc;
- EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11);
+ DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11),
+ fd,
+ fs,
+ cc);
}
void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x10, ft, fs, fd, 0x10);
+ DsFsmInstrFfff(EmitFR(0x11, 0x10, ft, fs, fd, 0x10), fd, fs, ft);
}
void MipsAssembler::SelD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x11, ft, fs, fd, 0x10);
+ DsFsmInstrFfff(EmitFR(0x11, 0x11, ft, fs, fd, 0x10), fd, fs, ft);
}
void MipsAssembler::ClassS(FRegister fd, FRegister fs) {
CHECK(IsR6());
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs);
}
void MipsAssembler::ClassD(FRegister fd, FRegister fs) {
CHECK(IsR6());
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs);
}
void MipsAssembler::MinS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x10, ft, fs, fd, 0x1c);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1c), fd, fs, ft);
}
void MipsAssembler::MinD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x11, ft, fs, fd, 0x1c);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1c), fd, fs, ft);
}
void MipsAssembler::MaxS(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x10, ft, fs, fd, 0x1e);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1e), fd, fs, ft);
}
void MipsAssembler::MaxD(FRegister fd, FRegister fs, FRegister ft) {
CHECK(IsR6());
- EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1e), fd, fs, ft);
}
void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs);
}
void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs);
}
void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs);
}
void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs);
}
void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs);
}
void MipsAssembler::Cvtdw(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21);
+ DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs);
}
void MipsAssembler::Cvtsd(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs);
}
void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs);
}
void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs);
}
void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+ DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs);
}
void MipsAssembler::FloorWS(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf);
+ DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs);
}
void MipsAssembler::FloorWD(FRegister fd, FRegister fs) {
- EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf);
+ DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs);
}
void MipsAssembler::Mfc1(Register rt, FRegister fs) {
- EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+ DsFsmInstrRf(EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
+ rt,
+ fs);
}
void MipsAssembler::Mtc1(Register rt, FRegister fs) {
- EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+ DsFsmInstrFr(EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
+ fs,
+ rt);
}
void MipsAssembler::Mfhc1(Register rt, FRegister fs) {
- EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+ DsFsmInstrRf(EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
+ rt,
+ fs);
}
void MipsAssembler::Mthc1(Register rt, FRegister fs) {
- EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+ DsFsmInstrFr(EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
+ fs,
+ rt);
}
void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
@@ -1330,28 +1575,33 @@
}
void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
- EmitI(0x31, rs, static_cast<Register>(ft), imm16);
+ DsFsmInstrFr(EmitI(0x31, rs, static_cast<Register>(ft), imm16), ft, rs);
}
void MipsAssembler::Ldc1(FRegister ft, Register rs, uint16_t imm16) {
- EmitI(0x35, rs, static_cast<Register>(ft), imm16);
+ DsFsmInstrFr(EmitI(0x35, rs, static_cast<Register>(ft), imm16), ft, rs);
}
void MipsAssembler::Swc1(FRegister ft, Register rs, uint16_t imm16) {
- EmitI(0x39, rs, static_cast<Register>(ft), imm16);
+ DsFsmInstrFR(EmitI(0x39, rs, static_cast<Register>(ft), imm16), ft, rs);
}
void MipsAssembler::Sdc1(FRegister ft, Register rs, uint16_t imm16) {
- EmitI(0x3d, rs, static_cast<Register>(ft), imm16);
+ DsFsmInstrFR(EmitI(0x3d, rs, static_cast<Register>(ft), imm16), ft, rs);
}
void MipsAssembler::Break() {
- EmitR(0, static_cast<Register>(0), static_cast<Register>(0),
- static_cast<Register>(0), 0, 0xD);
+ DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, 0, 0xD));
}
void MipsAssembler::Nop() {
- EmitR(0x0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), 0, 0x0);
+ DsFsmInstrNop(EmitR(0x0, ZERO, ZERO, ZERO, 0, 0x0));
+}
+
+void MipsAssembler::NopIfNoReordering() {
+ if (!reordering_) {
+ Nop();
+ }
}
void MipsAssembler::Move(Register rd, Register rs) {
@@ -1377,9 +1627,11 @@
}
void MipsAssembler::PopAndReturn(Register rd, Register rt) {
+ bool reordering = SetReorder(false);
Lw(rd, SP, 0);
Jr(rt);
- DecreaseFrameSize(kMipsWordSize);
+ DecreaseFrameSize(kMipsWordSize); // Single instruction in delay slot.
+ SetReorder(reordering);
}
void MipsAssembler::LoadConst32(Register rd, int32_t value) {
@@ -1550,7 +1802,8 @@
target_(target),
lhs_reg_(0),
rhs_reg_(0),
- condition_(kUncond) {
+ condition_(kUncond),
+ delayed_instruction_(kUnfilledDelaySlot) {
InitializeType(is_call, /* is_literal */ false, is_r6);
}
@@ -1565,7 +1818,8 @@
target_(target),
lhs_reg_(lhs_reg),
rhs_reg_(rhs_reg),
- condition_(condition) {
+ condition_(condition),
+ delayed_instruction_(kUnfilledDelaySlot) {
CHECK_NE(condition, kUncond);
switch (condition) {
case kCondLT:
@@ -1617,7 +1871,8 @@
target_(kUnresolved),
lhs_reg_(dest_reg),
rhs_reg_(base_reg),
- condition_(kUncond) {
+ condition_(kUncond),
+ delayed_instruction_(kUnfilledDelaySlot) {
CHECK_NE(dest_reg, ZERO);
if (is_r6) {
CHECK_EQ(base_reg, ZERO);
@@ -1696,12 +1951,38 @@
return old_location_;
}
+uint32_t MipsAssembler::Branch::GetPrecedingInstructionLength(Type type) const {
+ // Short branches with delay slots always consist of two instructions, the branch
+ // and the delay slot, irrespective of whether the delay slot is filled with a
+ // useful instruction or not.
+ // Long composite branches may have a length longer by one instruction than
+ // specified in branch_info_[].length. This happens when an instruction is taken
+ // to fill the short branch delay slot, but the branch eventually becomes long
+ // and formally has no delay slot to fill. This instruction is placed at the
+ // beginning of the long composite branch and this needs to be accounted for in
+ // the branch length and the location of the offset encoded in the branch.
+ switch (type) {
+ case kLongUncondBranch:
+ case kLongCondBranch:
+ case kLongCall:
+ case kR6LongCondBranch:
+ return (delayed_instruction_ != kUnfilledDelaySlot &&
+ delayed_instruction_ != kUnfillableDelaySlot) ? 1 : 0;
+ default:
+ return 0;
+ }
+}
+
+uint32_t MipsAssembler::Branch::GetPrecedingInstructionSize(Type type) const {
+ return GetPrecedingInstructionLength(type) * sizeof(uint32_t);
+}
+
uint32_t MipsAssembler::Branch::GetLength() const {
- return branch_info_[type_].length;
+ return GetPrecedingInstructionLength(type_) + branch_info_[type_].length;
}
uint32_t MipsAssembler::Branch::GetOldLength() const {
- return branch_info_[old_type_].length;
+ return GetPrecedingInstructionLength(old_type_) + branch_info_[old_type_].length;
}
uint32_t MipsAssembler::Branch::GetSize() const {
@@ -1883,7 +2164,8 @@
}
uint32_t MipsAssembler::Branch::GetOffsetLocation() const {
- return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+ return location_ + GetPrecedingInstructionSize(type_) +
+ branch_info_[type_].instr_offset * sizeof(uint32_t);
}
uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const {
@@ -1925,6 +2207,9 @@
CHECK(!label->IsBound());
uint32_t bound_pc = buffer_.Size();
+ // Make the delay slot FSM aware of the new label.
+ DsFsmLabel();
+
// Walk the list of branches referring to and preceding this label.
// Store the previously unknown target addresses in them.
while (label->IsLinked()) {
@@ -1997,11 +2282,15 @@
void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
uint32_t length = branches_.back().GetLength();
+ // Commit the last branch target label (if any).
+ DsFsmCommitLabel();
if (!label->IsBound()) {
// Branch forward (to a following label), distance is unknown.
// The first branch forward will contain 0, serving as the terminator of
// the list of forward-reaching branches.
Emit(label->position_);
+ // Nothing for the delay slot (yet).
+ DsFsmInstrNop(0);
length--;
// Now make the label object point to this branch
// (this forms a linked list of branches preceding this label).
@@ -2014,9 +2303,139 @@
}
}
+bool MipsAssembler::Branch::CanHaveDelayedInstruction(const DelaySlot& delay_slot) const {
+ if (delay_slot.instruction_ == 0) {
+ // NOP or no instruction for the delay slot.
+ return false;
+ }
+ switch (type_) {
+ // R2 unconditional branches.
+ case kUncondBranch:
+ case kLongUncondBranch:
+ // There are no register interdependencies.
+ return true;
+
+ // R2 calls.
+ case kCall:
+ case kLongCall:
+ // Instructions depending on or modifying RA should not be moved into delay slots
+ // of branches modifying RA.
+ return ((delay_slot.gpr_ins_mask_ | delay_slot.gpr_outs_mask_) & (1u << RA)) == 0;
+
+ // R2 conditional branches.
+ case kCondBranch:
+ case kLongCondBranch:
+ switch (condition_) {
+ // Branches with one GPR source.
+ case kCondLTZ:
+ case kCondGEZ:
+ case kCondLEZ:
+ case kCondGTZ:
+ case kCondEQZ:
+ case kCondNEZ:
+ return (delay_slot.gpr_outs_mask_ & (1u << lhs_reg_)) == 0;
+
+ // Branches with two GPR sources.
+ case kCondEQ:
+ case kCondNE:
+ return (delay_slot.gpr_outs_mask_ & ((1u << lhs_reg_) | (1u << rhs_reg_))) == 0;
+
+ // Branches with one FPU condition code source.
+ case kCondF:
+ case kCondT:
+ return (delay_slot.cc_outs_mask_ & (1u << lhs_reg_)) == 0;
+
+ default:
+ // We don't support synthetic R2 branches (preceded with slt[u]) at this level
+ // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >).
+ LOG(FATAL) << "Unexpected branch condition " << condition_;
+ UNREACHABLE();
+ }
+
+ // R6 unconditional branches.
+ case kR6UncondBranch:
+ case kR6LongUncondBranch:
+ // R6 calls.
+ case kR6Call:
+ case kR6LongCall:
+ // There are no delay slots.
+ return false;
+
+ // R6 conditional branches.
+ case kR6CondBranch:
+ case kR6LongCondBranch:
+ switch (condition_) {
+ // Branches with one FPU register source.
+ case kCondF:
+ case kCondT:
+ return (delay_slot.fpr_outs_mask_ & (1u << lhs_reg_)) == 0;
+ // Others have a forbidden slot instead of a delay slot.
+ default:
+ return false;
+ }
+
+ // Literals.
+ default:
+ LOG(FATAL) << "Unexpected branch type " << type_;
+ UNREACHABLE();
+ }
+}
+
+uint32_t MipsAssembler::Branch::GetDelayedInstruction() const {
+ return delayed_instruction_;
+}
+
+void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) {
+ CHECK_NE(instruction, kUnfilledDelaySlot);
+ CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot);
+ delayed_instruction_ = instruction;
+}
+
+void MipsAssembler::Branch::DecrementLocations() {
+ // We first create a branch object, which gets its type and locations initialized,
+ // and then we check if the branch can actually have the preceding instruction moved
+ // into its delay slot. If it can, the branch locations need to be decremented.
+ //
+ // We could make the check before creating the branch object and avoid the location
+ // adjustment, but the check is cleaner when performed on an initialized branch
+ // object.
+ //
+ // If the branch is backwards (to a previously bound label), reducing the locations
+ // cannot cause a short branch to exceed its offset range because the offset reduces.
+ // And this is not at all a problem for a long branch backwards.
+ //
+ // If the branch is forward (not linked to any label yet), reducing the locations
+ // is harmless. The branch will be promoted to long if needed when the target is known.
+ CHECK_EQ(location_, old_location_);
+ CHECK_GE(old_location_, sizeof(uint32_t));
+ old_location_ -= sizeof(uint32_t);
+ location_ = old_location_;
+}
+
+void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) {
+ if (branch.CanHaveDelayedInstruction(delay_slot_)) {
+ // The last instruction cannot be used in a different delay slot,
+ // do not commit the label before it (if any).
+ DsFsmDropLabel();
+ // Remove the last emitted instruction.
+ size_t size = buffer_.Size();
+ CHECK_GE(size, sizeof(uint32_t));
+ size -= sizeof(uint32_t);
+ CHECK_EQ(buffer_.Load<uint32_t>(size), delay_slot_.instruction_);
+ buffer_.Resize(size);
+ // Attach it to the branch and adjust the branch locations.
+ branch.DecrementLocations();
+ branch.SetDelayedInstruction(delay_slot_.instruction_);
+ } else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) {
+ // If reordefing is disabled, prevent absorption of the target instruction.
+ branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot);
+ }
+}
+
void MipsAssembler::Buncond(MipsLabel* label) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
+ MoveInstructionToDelaySlot(branches_.back());
FinalizeLabeledBranch(label);
}
@@ -2027,12 +2446,14 @@
}
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs);
+ MoveInstructionToDelaySlot(branches_.back());
FinalizeLabeledBranch(label);
}
void MipsAssembler::Call(MipsLabel* label) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
+ MoveInstructionToDelaySlot(branches_.back());
FinalizeLabeledBranch(label);
}
@@ -2104,6 +2525,7 @@
uint32_t end = old_size;
for (size_t i = branch_count; i > 0; ) {
Branch& branch = branches_[--i];
+ CHECK_GE(end, branch.GetOldEndLocation());
uint32_t size = end - branch.GetOldEndLocation();
buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
end = branch.GetOldLocation();
@@ -2148,26 +2570,53 @@
BranchCondition condition = branch->GetCondition();
Register lhs = branch->GetLeftRegister();
Register rhs = branch->GetRightRegister();
+ uint32_t delayed_instruction = branch->GetDelayedInstruction();
switch (branch->GetType()) {
// R2 short branches.
case Branch::kUncondBranch:
+ if (delayed_instruction == Branch::kUnfillableDelaySlot) {
+ // The branch was created when reordering was disabled, do not absorb the target
+ // instruction.
+ delayed_instruction = 0; // NOP.
+ } else if (delayed_instruction == Branch::kUnfilledDelaySlot) {
+ // Try to absorb the target instruction into the delay slot.
+ delayed_instruction = 0; // NOP.
+ // Incrementing the signed 16-bit offset past the target instruction must not
+ // cause overflow into the negative subrange, check for the max offset.
+ if (offset != 0x7FFF) {
+ uint32_t target = branch->GetTarget();
+ if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) {
+ delayed_instruction = buffer_.Load<uint32_t>(target);
+ offset++;
+ }
+ }
+ }
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
B(offset);
- Nop(); // TODO: improve by filling the delay slot.
+ Emit(delayed_instruction);
break;
case Branch::kCondBranch:
+ DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
+ if (delayed_instruction == Branch::kUnfilledDelaySlot) {
+ delayed_instruction = 0; // NOP.
+ }
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
EmitBcondR2(condition, lhs, rhs, offset);
- Nop(); // TODO: improve by filling the delay slot.
+ Emit(delayed_instruction);
break;
case Branch::kCall:
+ DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
+ if (delayed_instruction == Branch::kUnfilledDelaySlot) {
+ delayed_instruction = 0; // NOP.
+ }
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Bal(offset);
- Nop(); // TODO: improve by filling the delay slot.
+ Emit(delayed_instruction);
break;
// R2 near literal.
case Branch::kLiteral:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lw(lhs, rhs, offset);
break;
@@ -2192,6 +2641,12 @@
// For now simply use the stack for RA. This should be OK since for the
// vast majority of code a short PC-relative branch is sufficient.
// TODO: can this be improved?
+ // TODO: consider generation of a shorter sequence when we know that RA
+ // is explicitly preserved by the method entry/exit code.
+ if (delayed_instruction != Branch::kUnfilledDelaySlot &&
+ delayed_instruction != Branch::kUnfillableDelaySlot) {
+ Emit(delayed_instruction);
+ }
Push(RA);
Nal();
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -2204,6 +2659,10 @@
break;
case Branch::kLongCondBranch:
// The comment on case 'Branch::kLongUncondBranch' applies here as well.
+ DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
+ if (delayed_instruction != Branch::kUnfilledDelaySlot) {
+ Emit(delayed_instruction);
+ }
// Note: the opposite condition branch encodes 8 as the distance, which is equal to the
// number of instructions skipped:
// (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR).
@@ -2219,6 +2678,10 @@
DecreaseFrameSize(kMipsWordSize);
break;
case Branch::kLongCall:
+ DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
+ if (delayed_instruction != Branch::kUnfilledDelaySlot) {
+ Emit(delayed_instruction);
+ }
Nal();
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lui(AT, High16Bits(offset));
@@ -2230,6 +2693,7 @@
// R2 far literal.
case Branch::kFarLiteral:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
offset += (offset & 0x8000) << 1; // Account for sign extension in lw.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lui(AT, High16Bits(offset));
@@ -2239,33 +2703,48 @@
// R6 short branches.
case Branch::kR6UncondBranch:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Bc(offset);
break;
case Branch::kR6CondBranch:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
EmitBcondR6(condition, lhs, rhs, offset);
- Nop(); // TODO: improve by filling the forbidden/delay slot.
+ DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
+ if (delayed_instruction != Branch::kUnfilledDelaySlot) {
+ Emit(delayed_instruction);
+ } else {
+ // TODO: improve by filling the forbidden slot (IFF this is
+ // a forbidden and not a delay slot).
+ Nop();
+ }
break;
case Branch::kR6Call:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Balc(offset);
break;
// R6 near literal.
case Branch::kR6Literal:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lwpc(lhs, offset);
break;
// R6 long branches.
case Branch::kR6LongUncondBranch:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
offset += (offset & 0x8000) << 1; // Account for sign extension in jic.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Auipc(AT, High16Bits(offset));
Jic(AT, Low16Bits(offset));
break;
case Branch::kR6LongCondBranch:
+ DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
+ if (delayed_instruction != Branch::kUnfilledDelaySlot) {
+ Emit(delayed_instruction);
+ }
EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2);
offset += (offset & 0x8000) << 1; // Account for sign extension in jic.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -2273,6 +2752,7 @@
Jic(AT, Low16Bits(offset));
break;
case Branch::kR6LongCall:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
offset += (offset & 0x8000) << 1; // Account for sign extension in jialc.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Auipc(AT, High16Bits(offset));
@@ -2281,6 +2761,7 @@
// R6 far literal.
case Branch::kR6FarLiteral:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
offset += (offset & 0x8000) << 1; // Account for sign extension in lw.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Auipc(AT, High16Bits(offset));
@@ -2331,12 +2812,60 @@
Bcond(label, kCondGTZ, rt);
}
+bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const {
+ // If the instruction modifies AT, `rs` or `rt`, it can't be exchanged with the slt[u]
+ // instruction because either slt[u] depends on `rs` or `rt` or the following
+ // conditional branch depends on AT set by slt[u].
+ // Likewise, if the instruction depends on AT, it can't be exchanged with slt[u]
+ // because slt[u] changes AT.
+ return (delay_slot_.instruction_ != 0 &&
+ (delay_slot_.gpr_outs_mask_ & ((1u << AT) | (1u << rs) | (1u << rt))) == 0 &&
+ (delay_slot_.gpr_ins_mask_ & (1u << AT)) == 0);
+}
+
+void MipsAssembler::ExchangeWithSlt(const DelaySlot& forwarded_slot) {
+ // Exchange the last two instructions in the assembler buffer.
+ size_t size = buffer_.Size();
+ CHECK_GE(size, 2 * sizeof(uint32_t));
+ size_t pos1 = size - 2 * sizeof(uint32_t);
+ size_t pos2 = size - sizeof(uint32_t);
+ uint32_t instr1 = buffer_.Load<uint32_t>(pos1);
+ uint32_t instr2 = buffer_.Load<uint32_t>(pos2);
+ CHECK_EQ(instr1, forwarded_slot.instruction_);
+ CHECK_EQ(instr2, delay_slot_.instruction_);
+ buffer_.Store<uint32_t>(pos1, instr2);
+ buffer_.Store<uint32_t>(pos2, instr1);
+ // Set the current delay slot information to that of the last instruction
+ // in the buffer.
+ delay_slot_ = forwarded_slot;
+}
+
+void MipsAssembler::GenerateSltForCondBranch(bool unsigned_slt, Register rs, Register rt) {
+ // If possible, exchange the slt[u] instruction with the preceding instruction,
+ // so it can fill the delay slot.
+ DelaySlot forwarded_slot = delay_slot_;
+ bool exchange = CanExchangeWithSlt(rs, rt);
+ if (exchange) {
+ // The last instruction cannot be used in a different delay slot,
+ // do not commit the label before it (if any).
+ DsFsmDropLabel();
+ }
+ if (unsigned_slt) {
+ Sltu(AT, rs, rt);
+ } else {
+ Slt(AT, rs, rt);
+ }
+ if (exchange) {
+ ExchangeWithSlt(forwarded_slot);
+ }
+}
+
void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) {
if (IsR6()) {
Bcond(label, kCondLT, rs, rt);
} else if (!Branch::IsNop(kCondLT, rs, rt)) {
// Synthesize the instruction (not available on R2).
- Slt(AT, rs, rt);
+ GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt);
Bnez(AT, label);
}
}
@@ -2348,7 +2877,7 @@
B(label);
} else {
// Synthesize the instruction (not available on R2).
- Slt(AT, rs, rt);
+ GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt);
Beqz(AT, label);
}
}
@@ -2358,7 +2887,7 @@
Bcond(label, kCondLTU, rs, rt);
} else if (!Branch::IsNop(kCondLTU, rs, rt)) {
// Synthesize the instruction (not available on R2).
- Sltu(AT, rs, rt);
+ GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt);
Bnez(AT, label);
}
}
@@ -2370,7 +2899,7 @@
B(label);
} else {
// Synthesize the instruction (not available on R2).
- Sltu(AT, rs, rt);
+ GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt);
Beqz(AT, label);
}
}
@@ -2613,12 +3142,22 @@
LoadFromOffset(kLoadWord, RA, SP, stack_offset);
cfi_.Restore(DWARFReg(RA));
- // Decrease frame to required size.
- DecreaseFrameSize(frame_size);
-
- // Then jump to the return address.
- Jr(RA);
- Nop();
+ // Adjust the stack pointer in the delay slot if doing so doesn't break CFI.
+ bool exchange = IsInt<16>(static_cast<int32_t>(frame_size));
+ bool reordering = SetReorder(false);
+ if (exchange) {
+ // Jump to the return address.
+ Jr(RA);
+ // Decrease frame to required size.
+ DecreaseFrameSize(frame_size); // Single instruction in delay slot.
+ } else {
+ // Decrease frame to required size.
+ DecreaseFrameSize(frame_size);
+ // Jump to the return address.
+ Jr(RA);
+ Nop(); // In delay slot.
+ }
+ SetReorder(reordering);
// The CFI should be restored for any code that follows the exit block.
cfi_.RestoreState();
@@ -2963,7 +3502,7 @@
LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
base.AsCoreRegister(), offset.Int32Value());
Jalr(scratch.AsCoreRegister());
- Nop();
+ NopIfNoReordering();
// TODO: place reference map on call.
}
@@ -2975,7 +3514,7 @@
LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
scratch.AsCoreRegister(), offset.Int32Value());
Jalr(scratch.AsCoreRegister());
- Nop();
+ NopIfNoReordering();
// TODO: place reference map on call.
}
@@ -2998,9 +3537,6 @@
exception_blocks_.emplace_back(scratch, stack_adjust);
LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
S1, Thread::ExceptionOffset<kMipsPointerSize>().Int32Value());
- // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry());
- // as the NAL instruction (occurring in long R2 branches) may become deprecated.
- // For now use common for R2 and R6 instructions as this code must execute on both.
Bnez(scratch.AsCoreRegister(), exception_blocks_.back().Entry());
}
@@ -3017,7 +3553,7 @@
LoadFromOffset(kLoadWord, T9, S1,
QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pDeliverException).Int32Value());
Jr(T9);
- Nop();
+ NopIfNoReordering();
// Call never returns.
Break();
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 434ca67..d50c439 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -154,6 +154,9 @@
: Assembler(arena),
overwriting_(false),
overwrite_location_(0),
+ reordering_(true),
+ ds_fsm_state_(kExpectingLabel),
+ ds_fsm_target_pc_(0),
literals_(arena->Adapter(kArenaAllocAssembler)),
last_position_adjustment_(0),
last_old_position_(0),
@@ -163,6 +166,7 @@
}
size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+ size_t CodePosition() OVERRIDE;
DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
virtual ~MipsAssembler() {
@@ -256,6 +260,11 @@
void Slti(Register rt, Register rs, uint16_t imm16);
void Sltiu(Register rt, Register rs, uint16_t imm16);
+ // Branches and jumps to immediate offsets/addresses do not take care of their
+ // delay/forbidden slots and generally should not be used directly. This applies
+ // to the following R2 and R6 branch/jump instructions with imm16, imm21, addr26
+ // offsets/addresses.
+ // Use branches/jumps to labels instead.
void B(uint16_t imm16);
void Bal(uint16_t imm16);
void Beq(Register rs, Register rt, uint16_t imm16);
@@ -272,9 +281,13 @@
void Bc1t(int cc, uint16_t imm16); // R2
void J(uint32_t addr26);
void Jal(uint32_t addr26);
+ // Jalr() and Jr() fill their delay slots when reordering is enabled.
+ // When reordering is disabled, the delay slots must be filled manually.
+ // You may use NopIfNoReordering() to fill them when reordering is disabled.
void Jalr(Register rd, Register rs);
void Jalr(Register rs);
void Jr(Register rs);
+ // Nal() does not fill its delay slot. It must be filled manually.
void Nal();
void Auipc(Register rs, uint16_t imm16); // R6
void Addiupc(Register rs, uint32_t imm19); // R6
@@ -403,6 +416,7 @@
void Break();
void Nop();
+ void NopIfNoReordering();
void Move(Register rd, Register rs);
void Clear(Register rd);
void Not(Register rd, Register rs);
@@ -414,7 +428,8 @@
void LoadSConst32(FRegister r, int32_t value, Register temp);
void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT);
- // These will generate R2 branches or R6 branches as appropriate.
+ // These will generate R2 branches or R6 branches as appropriate and take care of
+ // the delay/forbidden slots.
void Bind(MipsLabel* label);
void B(MipsLabel* label);
void Bal(MipsLabel* label);
@@ -868,7 +883,51 @@
};
friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+ // Enables or disables instruction reordering (IOW, automatic filling of delay slots)
+ // similarly to ".set reorder" / ".set noreorder" in traditional MIPS assembly.
+ // Returns the last state, which may be useful for temporary enabling/disabling of
+ // reordering.
+ bool SetReorder(bool enable);
+
private:
+ // Description of the last instruction in terms of input and output registers.
+ // Used to make the decision of moving the instruction into a delay slot.
+ struct DelaySlot {
+ DelaySlot();
+ // Encoded instruction that may be used to fill the delay slot or 0
+ // (0 conveniently represents NOP).
+ uint32_t instruction_;
+ // Mask of output GPRs for the instruction.
+ uint32_t gpr_outs_mask_;
+ // Mask of input GPRs for the instruction.
+ uint32_t gpr_ins_mask_;
+ // Mask of output FPRs for the instruction.
+ uint32_t fpr_outs_mask_;
+ // Mask of input FPRs for the instruction.
+ uint32_t fpr_ins_mask_;
+ // Mask of output FPU condition code flags for the instruction.
+ uint32_t cc_outs_mask_;
+ // Mask of input FPU condition code flags for the instruction.
+ uint32_t cc_ins_mask_;
+ // Branches never operate on the LO and HI registers, hence there's
+ // no mask for LO and HI.
+ };
+
+ // Delay slot finite state machine's (DS FSM's) state. The FSM state is updated
+ // upon every new instruction and label generated. The FSM detects instructions
+ // suitable for delay slots and immediately preceded with labels. These are target
+ // instructions for branches. If an unconditional R2 branch does not get its delay
+ // slot filled with the immediately preceding instruction, it may instead get the
+ // slot filled with the target instruction (the branch will need its offset
+ // incremented past the target instruction). We call this "absorption". The FSM
+ // records PCs of the target instructions suitable for this optimization.
+ enum DsFsmState {
+ kExpectingLabel,
+ kExpectingInstruction,
+ kExpectingCommit
+ };
+ friend std::ostream& operator<<(std::ostream& os, const DsFsmState& rhs);
+
class Branch {
public:
enum Type {
@@ -910,6 +969,17 @@
static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_
static constexpr int32_t kMaxBranchLength = 32;
static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+ // The following two instruction encodings can never legally occur in branch delay
+ // slots and are used as markers.
+ //
+ // kUnfilledDelaySlot means that the branch may use either the preceding or the target
+ // instruction to fill its delay slot (the latter is only possible with unconditional
+ // R2 branches and is termed here as "absorption").
+ static constexpr uint32_t kUnfilledDelaySlot = 0x10000000; // beq zero, zero, 0.
+ // kUnfillableDelaySlot means that the branch cannot use an instruction (other than NOP)
+ // to fill its delay slot. This is only used for unconditional R2 branches to prevent
+ // absorption of the target instruction when reordering is disabled.
+ static constexpr uint32_t kUnfillableDelaySlot = 0x13FF0000; // beq ra, ra, 0.
struct BranchInfo {
// Branch length as a number of 4-byte-long instructions.
@@ -958,6 +1028,8 @@
uint32_t GetTarget() const;
uint32_t GetLocation() const;
uint32_t GetOldLocation() const;
+ uint32_t GetPrecedingInstructionLength(Type type) const;
+ uint32_t GetPrecedingInstructionSize(Type type) const;
uint32_t GetLength() const;
uint32_t GetOldLength() const;
uint32_t GetSize() const;
@@ -967,6 +1039,12 @@
bool IsLong() const;
bool IsResolved() const;
+ // Various helpers for branch delay slot management.
+ bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const;
+ void SetDelayedInstruction(uint32_t instruction);
+ uint32_t GetDelayedInstruction() const;
+ void DecrementLocations();
+
// Returns the bit size of the signed offset that the branch instruction can handle.
OffsetBits GetOffsetSize() const;
@@ -1031,27 +1109,34 @@
// Helper for the above.
void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
- uint32_t old_location_; // Offset into assembler buffer in bytes.
- uint32_t location_; // Offset into assembler buffer in bytes.
- uint32_t target_; // Offset into assembler buffer in bytes.
+ uint32_t old_location_; // Offset into assembler buffer in bytes.
+ uint32_t location_; // Offset into assembler buffer in bytes.
+ uint32_t target_; // Offset into assembler buffer in bytes.
- uint32_t lhs_reg_; // Left-hand side register in conditional branches or
- // indirect call register.
- uint32_t rhs_reg_; // Right-hand side register in conditional branches.
- BranchCondition condition_; // Condition for conditional branches.
+ uint32_t lhs_reg_; // Left-hand side register in conditional branches or
+ // FPU condition code. Destination register in literals.
+ uint32_t rhs_reg_; // Right-hand side register in conditional branches.
+ // Base register in literals (ZERO on R6).
+ BranchCondition condition_; // Condition for conditional branches.
- Type type_; // Current type of the branch.
- Type old_type_; // Initial type of the branch.
+ Type type_; // Current type of the branch.
+ Type old_type_; // Initial type of the branch.
+
+ uint32_t delayed_instruction_; // Encoded instruction for the delay slot or
+ // kUnfilledDelaySlot if none but fillable or
+ // kUnfillableDelaySlot if none and unfillable
+ // (the latter is only used for unconditional R2
+ // branches).
};
friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
- void EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct);
- void EmitI(int opcode, Register rs, Register rt, uint16_t imm);
- void EmitI21(int opcode, Register rs, uint32_t imm21);
- void EmitI26(int opcode, uint32_t imm26);
- void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
- void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
+ uint32_t EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct);
+ uint32_t EmitI(int opcode, Register rs, Register rt, uint16_t imm);
+ uint32_t EmitI21(int opcode, Register rs, uint32_t imm21);
+ uint32_t EmitI26(int opcode, uint32_t imm26);
+ uint32_t EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
+ uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
@@ -1060,6 +1145,33 @@
void Call(MipsLabel* label);
void FinalizeLabeledBranch(MipsLabel* label);
+ // Various helpers for branch delay slot management.
+ void DsFsmInstr(uint32_t instruction,
+ uint32_t gpr_outs_mask,
+ uint32_t gpr_ins_mask,
+ uint32_t fpr_outs_mask,
+ uint32_t fpr_ins_mask,
+ uint32_t cc_outs_mask,
+ uint32_t cc_ins_mask);
+ void DsFsmInstrNop(uint32_t instruction);
+ void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2);
+ void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3);
+ void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2);
+ void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3);
+ void DsFsmInstrRf(uint32_t instruction, Register out, FRegister in);
+ void DsFsmInstrFr(uint32_t instruction, FRegister out, Register in);
+ void DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2);
+ void DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2);
+ void DsFsmInstrRrrc(uint32_t instruction, Register in1_out, Register in2, int cc_in);
+ void DsFsmInstrFffc(uint32_t instruction, FRegister in1_out, FRegister in2, int cc_in);
+ void DsFsmLabel();
+ void DsFsmCommitLabel();
+ void DsFsmDropLabel();
+ void MoveInstructionToDelaySlot(Branch& branch);
+ bool CanExchangeWithSlt(Register rs, Register rt) const;
+ void ExchangeWithSlt(const DelaySlot& forwarded_slot);
+ void GenerateSltForCondBranch(bool unsigned_slt, Register rs, Register rt);
+
Branch* GetBranch(uint32_t branch_id);
const Branch* GetBranch(uint32_t branch_id) const;
uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
@@ -1100,6 +1212,17 @@
// The current overwrite location.
uint32_t overwrite_location_;
+ // Whether instruction reordering (IOW, automatic filling of delay slots) is enabled.
+ bool reordering_;
+ // Information about the last instruction that may be used to fill a branch delay slot.
+ DelaySlot delay_slot_;
+ // Delay slot FSM state.
+ DsFsmState ds_fsm_state_;
+ // PC of the current labeled target instruction.
+ uint32_t ds_fsm_target_pc_;
+ // PCs of labeled target instructions.
+ std::vector<uint32_t> ds_fsm_target_pcs_;
+
// Use std::deque<> for literal labels to allow insertions at the end
// without invalidating pointers and references to existing elements.
ArenaDeque<Literal> literals_;
@@ -1109,7 +1232,7 @@
// that PC (from NAL) points to.
MipsLabel pc_rel_base_label_;
- // Data for AdjustedPosition(), see the description there.
+ // Data for GetAdjustedPosition(), see the description there.
uint32_t last_position_adjustment_;
uint32_t last_old_position_;
uint32_t last_branch_id_;
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 49ef272..fabb096 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -673,6 +673,144 @@
// BRANCHES //
//////////////
+TEST_F(AssemblerMIPS32r6Test, ImpossibleReordering) {
+ mips::MipsLabel label;
+ __ SetReorder(true);
+ __ Bind(&label);
+
+ __ CmpLtD(mips::F0, mips::F2, mips::F4);
+ __ Bc1nez(mips::F0, &label); // F0 dependency.
+
+ __ MulD(mips::F10, mips::F2, mips::F4);
+ __ Bc1eqz(mips::F10, &label); // F10 dependency.
+
+ std::string expected =
+ ".set noreorder\n"
+ "1:\n"
+
+ "cmp.lt.d $f0, $f2, $f4\n"
+ "bc1nez $f0, 1b\n"
+ "nop\n"
+
+ "mul.d $f10, $f2, $f4\n"
+ "bc1eqz $f10, 1b\n"
+ "nop\n";
+ DriverStr(expected, "ImpossibleReordering");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Reordering) {
+ mips::MipsLabel label;
+ __ SetReorder(true);
+ __ Bind(&label);
+
+ __ CmpLtD(mips::F0, mips::F2, mips::F4);
+ __ Bc1nez(mips::F2, &label);
+
+ __ MulD(mips::F0, mips::F2, mips::F4);
+ __ Bc1eqz(mips::F4, &label);
+
+ std::string expected =
+ ".set noreorder\n"
+ "1:\n"
+
+ "bc1nez $f2, 1b\n"
+ "cmp.lt.d $f0, $f2, $f4\n"
+
+ "bc1eqz $f4, 1b\n"
+ "mul.d $f0, $f2, $f4\n";
+ DriverStr(expected, "Reordering");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SetReorder) {
+ mips::MipsLabel label1, label2, label3, label4;
+
+ __ SetReorder(true);
+ __ Bind(&label1);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bc1nez(mips::F0, &label1);
+
+ __ SetReorder(false);
+ __ Bind(&label2);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bc1nez(mips::F0, &label2);
+
+ __ SetReorder(true);
+ __ Bind(&label3);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bc1eqz(mips::F0, &label3);
+
+ __ SetReorder(false);
+ __ Bind(&label4);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bc1eqz(mips::F0, &label4);
+
+ std::string expected =
+ ".set noreorder\n"
+ "1:\n"
+ "bc1nez $f0, 1b\n"
+ "addu $t0, $t1, $t2\n"
+
+ "2:\n"
+ "addu $t0, $t1, $t2\n"
+ "bc1nez $f0, 2b\n"
+ "nop\n"
+
+ "3:\n"
+ "bc1eqz $f0, 3b\n"
+ "addu $t0, $t1, $t2\n"
+
+ "4:\n"
+ "addu $t0, $t1, $t2\n"
+ "bc1eqz $f0, 4b\n"
+ "nop\n";
+ DriverStr(expected, "SetReorder");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
+ mips::MipsLabel label;
+ __ SetReorder(true);
+ __ Subu(mips::T0, mips::T1, mips::T2);
+ __ Bc1nez(mips::F0, &label);
+ constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+ for (uint32_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+ constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+ for (uint32_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Subu(mips::T0, mips::T1, mips::T2);
+ __ Bc1eqz(mips::F0, &label);
+
+ uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic.
+ offset_forward <<= 2;
+ offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic.
+
+ uint32_t offset_back = -(kAdduCount2 + 2); // 2: account for subu and bc1nez.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic.
+
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "subu $t0, $t1, $t2\n"
+ "bc1eqz $f0, 1f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+ "1:\n" <<
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+ "2:\n" <<
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+ "subu $t0, $t1, $t2\n"
+ "bc1nez $f0, 3f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "3:\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBeqc");
+}
+
// TODO: MipsAssembler::Addiupc
// MipsAssembler::Bc
// MipsAssembler::Jic
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 50a8dc2..708bc3d 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2009,14 +2009,17 @@
}
TEST_F(AssemblerMIPSTest, Beq) {
+ __ SetReorder(false);
BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
}
TEST_F(AssemblerMIPSTest, Bne) {
+ __ SetReorder(false);
BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
}
TEST_F(AssemblerMIPSTest, Beqz) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Beqz(mips::A0, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2043,6 +2046,7 @@
}
TEST_F(AssemblerMIPSTest, Bnez) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Bnez(mips::A0, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2069,22 +2073,27 @@
}
TEST_F(AssemblerMIPSTest, Bltz) {
+ __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
}
TEST_F(AssemblerMIPSTest, Bgez) {
+ __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
}
TEST_F(AssemblerMIPSTest, Blez) {
+ __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
}
TEST_F(AssemblerMIPSTest, Bgtz) {
+ __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
}
TEST_F(AssemblerMIPSTest, Blt) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Blt(mips::A0, mips::A1, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2113,6 +2122,7 @@
}
TEST_F(AssemblerMIPSTest, Bge) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Bge(mips::A0, mips::A1, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2141,6 +2151,7 @@
}
TEST_F(AssemblerMIPSTest, Bltu) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Bltu(mips::A0, mips::A1, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2169,6 +2180,7 @@
}
TEST_F(AssemblerMIPSTest, Bgeu) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Bgeu(mips::A0, mips::A1, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2197,6 +2209,7 @@
}
TEST_F(AssemblerMIPSTest, Bc1f) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Bc1f(0, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2223,6 +2236,7 @@
}
TEST_F(AssemblerMIPSTest, Bc1t) {
+ __ SetReorder(false);
mips::MipsLabel label;
__ Bc1t(0, &label);
constexpr size_t kAdduCount1 = 63;
@@ -2331,6 +2345,410 @@
DriverStr(expected, "LoadNearestFarLiteral");
}
+TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
+ mips::MipsLabel label1, label2;
+ __ SetReorder(true);
+
+ __ B(&label1); // No preceding or target instruction for the delay slot.
+
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bind(&label1);
+ __ B(&label1); // The preceding label prevents moving Addu into the delay slot.
+ __ B(&label1); // No preceding or target instruction for the delay slot.
+
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Beqz(mips::T0, &label1); // T0 dependency.
+
+ __ Or(mips::T1, mips::T2, mips::T3);
+ __ Bne(mips::T2, mips::T1, &label1); // T1 dependency.
+
+ __ And(mips::T0, mips::T1, mips::T2);
+ __ Blt(mips::T1, mips::T0, &label1); // T0 dependency.
+
+ __ Xor(mips::AT, mips::T0, mips::T1);
+ __ Bge(mips::T1, mips::T0, &label1); // AT dependency.
+
+ __ Subu(mips::T0, mips::T1, mips::AT);
+ __ Bltu(mips::T1, mips::T0, &label1); // AT dependency.
+
+ __ ColtS(1, mips::F2, mips::F4);
+ __ Bc1t(1, &label1); // cc1 dependency.
+
+ __ Move(mips::T0, mips::RA);
+ __ Bal(&label1); // RA dependency.
+
+ __ Lw(mips::RA, mips::T0, 0);
+ __ Bal(&label1); // RA dependency.
+
+ __ LlR2(mips::T9, mips::T0, 0);
+ __ Jalr(mips::T9); // T9 dependency.
+
+ __ Sw(mips::RA, mips::T0, 0);
+ __ Jalr(mips::T9); // RA dependency.
+
+ __ Lw(mips::T1, mips::T0, 0);
+ __ Jalr(mips::T1, mips::T9); // T1 dependency.
+
+ __ ScR2(mips::T9, mips::T0, 0);
+ __ Jr(mips::T9); // T9 dependency.
+
+ __ Bind(&label2);
+
+ __ Bnez(mips::T0, &label2); // No preceding instruction for the delay slot.
+
+ __ Bgeu(mips::T1, mips::T0, &label2); // No preceding instruction for the delay slot.
+
+ __ Bc1f(2, &label2); // No preceding instruction for the delay slot.
+
+ __ Bal(&label2); // No preceding instruction for the delay slot.
+
+ __ Jalr(mips::T9); // No preceding instruction for the delay slot.
+
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ CodePosition(); // Drops the delay slot candidate (the last instruction).
+ __ Beq(mips::T1, mips::T2, &label2); // No preceding or target instruction for the delay slot.
+
+ std::string expected =
+ ".set noreorder\n"
+ "b 1f\n"
+ "nop\n"
+
+ "addu $t0, $t1, $t2\n"
+ "1:\n"
+ "b 1b\n"
+ "nop\n"
+ "b 1b\n"
+ "nop\n"
+
+ "addu $t0, $t1, $t2\n"
+ "beq $zero, $t0, 1b\n"
+ "nop\n"
+
+ "or $t1, $t2, $t3\n"
+ "bne $t2, $t1, 1b\n"
+ "nop\n"
+
+ "and $t0, $t1, $t2\n"
+ "slt $at, $t1, $t0\n"
+ "bne $zero, $at, 1b\n"
+ "nop\n"
+
+ "xor $at, $t0, $t1\n"
+ "slt $at, $t1, $t0\n"
+ "beq $zero, $at, 1b\n"
+ "nop\n"
+
+ "subu $t0, $t1, $at\n"
+ "sltu $at, $t1, $t0\n"
+ "bne $zero, $at, 1b\n"
+ "nop\n"
+
+ "c.olt.s $fcc1, $f2, $f4\n"
+ "bc1t $fcc1, 1b\n"
+ "nop\n"
+
+ "or $t0, $ra, $zero\n"
+ "bal 1b\n"
+ "nop\n"
+
+ "lw $ra, 0($t0)\n"
+ "bal 1b\n"
+ "nop\n"
+
+ "ll $t9, 0($t0)\n"
+ "jalr $t9\n"
+ "nop\n"
+
+ "sw $ra, 0($t0)\n"
+ "jalr $t9\n"
+ "nop\n"
+
+ "lw $t1, 0($t0)\n"
+ "jalr $t1, $t9\n"
+ "nop\n"
+
+ "sc $t9, 0($t0)\n"
+ "jalr $zero, $t9\n"
+ "nop\n"
+
+ "2:\n"
+
+ "bne $zero, $t0, 2b\n"
+ "nop\n"
+
+ "sltu $at, $t1, $t0\n"
+ "beq $zero, $at, 2b\n"
+ "nop\n"
+
+ "bc1f $fcc2, 2b\n"
+ "nop\n"
+
+ "bal 2b\n"
+ "nop\n"
+
+ "jalr $t9\n"
+ "nop\n"
+
+ "addu $t0, $t1, $t2\n"
+ "beq $t1, $t2, 2b\n"
+ "nop\n";
+ DriverStr(expected, "ImpossibleReordering");
+}
+
+TEST_F(AssemblerMIPSTest, Reordering) {
+ mips::MipsLabel label1, label2;
+ __ SetReorder(true);
+
+ __ Bind(&label1);
+ __ Bind(&label2);
+
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Beqz(mips::T1, &label1);
+
+ __ Or(mips::T1, mips::T2, mips::T3);
+ __ Bne(mips::T2, mips::T3, &label1);
+
+ __ And(mips::T0, mips::T1, mips::T2);
+ __ Blt(mips::T1, mips::T2, &label1);
+
+ __ Xor(mips::T2, mips::T0, mips::T1);
+ __ Bge(mips::T1, mips::T0, &label1);
+
+ __ Subu(mips::T2, mips::T1, mips::T0);
+ __ Bltu(mips::T1, mips::T0, &label1);
+
+ __ ColtS(0, mips::F2, mips::F4);
+ __ Bc1t(1, &label1);
+
+ __ Move(mips::T0, mips::T1);
+ __ Bal(&label1);
+
+ __ LlR2(mips::T1, mips::T0, 0);
+ __ Jalr(mips::T9);
+
+ __ ScR2(mips::T1, mips::T0, 0);
+ __ Jr(mips::T9);
+
+ std::string expected =
+ ".set noreorder\n"
+ "1:\n"
+
+ "beq $zero, $t1, 1b\n"
+ "addu $t0, $t1, $t2\n"
+
+ "bne $t2, $t3, 1b\n"
+ "or $t1, $t2, $t3\n"
+
+ "slt $at, $t1, $t2\n"
+ "bne $zero, $at, 1b\n"
+ "and $t0, $t1, $t2\n"
+
+ "slt $at, $t1, $t0\n"
+ "beq $zero, $at, 1b\n"
+ "xor $t2, $t0, $t1\n"
+
+ "sltu $at, $t1, $t0\n"
+ "bne $zero, $at, 1b\n"
+ "subu $t2, $t1, $t0\n"
+
+ "bc1t $fcc1, 1b\n"
+ "c.olt.s $fcc0, $f2, $f4\n"
+
+ "bal 1b\n"
+ "or $t0, $t1, $zero\n"
+
+ "jalr $t9\n"
+ "ll $t1, 0($t0)\n"
+
+ "jalr $zero, $t9\n"
+ "sc $t1, 0($t0)\n";
+ DriverStr(expected, "Reordering");
+}
+
+TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
+ mips::MipsLabel label1, label2, label3, label4, label5, label6;
+ __ SetReorder(true);
+
+ __ B(&label1);
+ __ Bind(&label1);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+
+ __ Bind(&label2);
+ __ Xor(mips::T0, mips::T1, mips::T2);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bind(&label3); // Prevents reordering ADDU above with B below.
+ __ B(&label2);
+
+ __ B(&label4);
+ __ Bind(&label4);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ CodePosition(); // Prevents absorbing ADDU above.
+
+ __ B(&label5);
+ __ Bind(&label5);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ Bind(&label6);
+ __ CodePosition(); // Even across Bind(), CodePosition() prevents absorbing the ADDU above.
+
+ std::string expected =
+ ".set noreorder\n"
+ "b 1f\n"
+ "addu $t0, $t1, $t2\n"
+ "addu $t0, $t1, $t2\n"
+ "1:\n"
+
+ "xor $t0, $t1, $t2\n"
+ "2:\n"
+ "addu $t0, $t1, $t2\n"
+ "b 2b\n"
+ "xor $t0, $t1, $t2\n"
+
+ "b 4f\n"
+ "nop\n"
+ "4:\n"
+ "addu $t0, $t1, $t2\n"
+
+ "b 5f\n"
+ "nop\n"
+ "5:\n"
+ "addu $t0, $t1, $t2\n";
+ DriverStr(expected, "AbsorbTargetInstruction");
+}
+
+TEST_F(AssemblerMIPSTest, SetReorder) {
+ mips::MipsLabel label1, label2, label3, label4, label5, label6;
+
+ __ SetReorder(true);
+ __ Bind(&label1);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ B(&label1);
+ __ B(&label5);
+ __ B(&label6);
+
+ __ SetReorder(false);
+ __ Bind(&label2);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ B(&label2);
+ __ B(&label5);
+ __ B(&label6);
+
+ __ SetReorder(true);
+ __ Bind(&label3);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ B(&label3);
+ __ B(&label5);
+ __ B(&label6);
+
+ __ SetReorder(false);
+ __ Bind(&label4);
+ __ Addu(mips::T0, mips::T1, mips::T2);
+ __ B(&label4);
+ __ B(&label5);
+ __ B(&label6);
+
+ __ SetReorder(true);
+ __ Bind(&label5);
+ __ Subu(mips::T0, mips::T1, mips::T2);
+
+ __ SetReorder(false);
+ __ Bind(&label6);
+ __ Xor(mips::T0, mips::T1, mips::T2);
+
+ std::string expected =
+ ".set noreorder\n"
+ "1:\n"
+ "b 1b\n"
+ "addu $t0, $t1, $t2\n"
+ "b 55f\n"
+ "subu $t0, $t1, $t2\n"
+ "b 6f\n"
+ "nop\n"
+
+ "2:\n"
+ "addu $t0, $t1, $t2\n"
+ "b 2b\n"
+ "nop\n"
+ "b 5f\n"
+ "nop\n"
+ "b 6f\n"
+ "nop\n"
+
+ "3:\n"
+ "b 3b\n"
+ "addu $t0, $t1, $t2\n"
+ "b 55f\n"
+ "subu $t0, $t1, $t2\n"
+ "b 6f\n"
+ "nop\n"
+
+ "4:\n"
+ "addu $t0, $t1, $t2\n"
+ "b 4b\n"
+ "nop\n"
+ "b 5f\n"
+ "nop\n"
+ "b 6f\n"
+ "nop\n"
+
+ "5:\n"
+ "subu $t0, $t1, $t2\n"
+ "55:\n"
+ "6:\n"
+ "xor $t0, $t1, $t2\n";
+ DriverStr(expected, "SetReorder");
+}
+
+TEST_F(AssemblerMIPSTest, LongBranchReorder) {
+ mips::MipsLabel label;
+ __ SetReorder(true);
+ __ Subu(mips::T0, mips::T1, mips::T2);
+ __ B(&label);
+ constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+ constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Subu(mips::T0, mips::T1, mips::T2);
+ __ B(&label);
+
+ // Account for 5 extra instructions: ori, addu, lw, jalr, addiu.
+ uint32_t offset_forward = (kAdduCount1 + 5) * sizeof(uint32_t);
+ // Account for 5 extra instructions: subu, addiu, sw, nal, lui.
+ uint32_t offset_back = -(kAdduCount1 + 5) * sizeof(uint32_t);
+
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "subu $t0, $t1, $t2\n"
+ "addiu $sp, $sp, -4\n"
+ "sw $ra, 0($sp)\n"
+ "bltzal $zero, .+4\n"
+ "lui $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+ "ori $at, $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+ "addu $at, $at, $ra\n"
+ "lw $ra, 0($sp)\n"
+ "jalr $zero, $at\n"
+ "addiu $sp, $sp, 4\n" <<
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+ "subu $t0, $t1, $t2\n"
+ "addiu $sp, $sp, -4\n"
+ "sw $ra, 0($sp)\n"
+ "bltzal $zero, .+4\n"
+ "lui $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "ori $at, $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "addu $at, $at, $ra\n"
+ "lw $ra, 0($sp)\n"
+ "jalr $zero, $at\n"
+ "addiu $sp, $sp, 4\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBranchReorder");
+}
+
#undef __
} // namespace art