diff options
Diffstat (limited to 'compiler/utils/mips64')
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 1119 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 413 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64_test.cc | 438 |
3 files changed, 1680 insertions, 290 deletions
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ba2525e555..cfd8421e93 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -19,15 +19,73 @@ #include "base/bit_utils.h" #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "memory_region.h" #include "thread.h" namespace art { namespace mips64 { +void Mips64Assembler::FinalizeCode() { + for (auto& exception_block : exception_blocks_) { + EmitExceptionPoll(&exception_block); + } + PromoteBranches(); +} + +void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + Assembler::FinalizeInstructions(region); + PatchCFI(); +} + +void Mips64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Mips64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + void Mips64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, @@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) { CHECK_NE(rs, kNoGpuRegister); + CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | - (imm21 & 0x1FFFFF); + imm21; Emit(encoding); } -void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) { - uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | - (addr26 & 0x3FFFFFF); +void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) { + CHECK(IsUint<26>(imm26)) << imm26; + uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); } @@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xb, rs, rt, imm16); } -void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::J(uint32_t addr26) { - EmitJ(0x2, addr26); - Nop(); -} - -void Mips64Assembler::Jal(uint32_t addr26) { - EmitJ(0x3, addr26); - Nop(); -} - void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x35); } @@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) { void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) { EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09); - Nop(); } void Mips64Assembler::Jalr(GpuRegister rs) { @@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) { EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16); } +void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, imm19); +} + +void Mips64Assembler::Bc(uint32_t imm26) { + EmitI26(0x32, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) { @@ -569,6 +616,81 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0x9, ft, imm16); +} + +void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0xD, ft, imm16); +} + +void Mips64Assembler::EmitBcondc(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { + switch (cond) { + case kCondLT: + Bltc(rs, rt, imm16_21); + break; + case kCondGE: + Bgec(rs, rt, imm16_21); + break; + case kCondLE: + Bgec(rt, rs, imm16_21); + break; + case kCondGT: + Bltc(rt, rs, imm16_21); + break; + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltzc(rs, imm16_21); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgezc(rs, imm16_21); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blezc(rs, imm16_21); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtzc(rs, imm16_21); + break; + case kCondEQ: + Beqc(rs, rt, imm16_21); + break; + case kCondNE: + Bnec(rs, rt, imm16_21); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqzc(rs, imm16_21); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnezc(rs, imm16_21); + break; + case kCondLTU: + Bltuc(rs, rt, imm16_21); + break; + case kCondGEU: + Bgeuc(rs, rt, imm16_21); + break; + case kCondF: + CHECK_EQ(rt, ZERO); + Bc1eqz(static_cast<FpuRegister>(rs), imm16_21); + break; + case kCondT: + CHECK_EQ(rt, ZERO); + Bc1nez(static_cast<FpuRegister>(rs), imm16_21); + break; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -721,6 +843,86 @@ void Mips64Assembler::MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); } +void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x13); +} + +void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x13); +} + void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20); } @@ -925,15 +1127,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } } -void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) { - if (IsInt<16>(value)) { - Addiu(rt, rs, value); - } else { - LoadConst32(rtmp, value); - Addu(rt, rs, rtmp); - } -} - void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -943,177 +1136,637 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp } } -// -// MIPS64R6 branches -// -// -// Unconditional (pc + 32-bit signed offset): -// -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Conditional (pc + 32-bit signed offset): -// -// b<cond>c reg, +2 // skip next 2 instructions -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Unconditional (pc + 32-bit signed offset) and link: -// -// auipc reg, ofs_high -// daddiu reg, ofs_low -// jialc reg, 0 -// // no delay/forbidden slot -// -// -// TODO: use shorter instruction sequences whenever possible. -// - -void Mips64Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int32_t bound_pc = buffer_.Size(); +void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size, + Mips64Assembler::Branch::Type short_type, + Mips64Assembler::Branch::Type long_type) { + type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; +} - // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label. - // Embed the previously unknown pc-relative addresses in them. - while (label->IsLinked()) { - int32_t position = label->Position(); - // Extract the branch (instruction pair) - uint32_t auipc = buffer_.Load<uint32_t>(position); - uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu +void Mips64Assembler::Branch::InitializeType(bool is_call) { + OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + if (is_call) { + InitShortOrLong(offset_size, kCall, kLongCall); + } else if (condition_ == kUncond) { + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + } else { + if (condition_ == kCondEQZ || condition_ == kCondNEZ) { + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + } else { + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + } + } + old_type_ = type_; +} - // Extract the location of the previous pair in the list (walking the list backwards; - // the previous pair location was stored in the immediate operands of the instructions) - int32_t prev = (auipc << 16) | (jic & 0xFFFF); +bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { + switch (condition) { + case kCondLT: + case kCondGT: + case kCondNE: + case kCondLTU: + return lhs == rhs; + default: + return false; + } +} - // Get the pc-relative address - uint32_t offset = bound_pc - position; - offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu +bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondGE: + case kCondLE: + case kCondEQ: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} - // Embed it in the two instructions - auipc = (auipc & 0xFFFF0000) | (offset >> 16); - jic = (jic & 0xFFFF0000) | (offset & 0xFFFF); +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(ZERO), + rhs_reg_(ZERO), + condition_(kUncond) { + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Mips64Assembler::BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + CHECK_NE(condition, kUncond); + switch (condition) { + case kCondEQ: + case kCondNE: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + CHECK_NE(lhs_reg, ZERO); + CHECK_NE(rhs_reg, ZERO); + break; + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + CHECK_NE(lhs_reg, ZERO); + CHECK_EQ(rhs_reg, ZERO); + break; + case kCondF: + case kCondT: + CHECK_EQ(rhs_reg, ZERO); + break; + case kUncond: + UNREACHABLE(); + } + CHECK(!IsNop(condition, lhs_reg, rhs_reg)); + if (IsUncond(condition, lhs_reg, rhs_reg)) { + // Branch condition is always true, make the branch unconditional. + condition_ = kUncond; + } + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(indirect_reg), + rhs_reg_(ZERO), + condition_(kUncond) { + CHECK_NE(indirect_reg, ZERO); + CHECK_NE(indirect_reg, AT); + InitializeType(true); +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( + Mips64Assembler::BranchCondition cond) { + switch (cond) { + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTZ: + return kCondGEZ; + case kCondGEZ: + return kCondLTZ; + case kCondLEZ: + return kCondGTZ; + case kCondGTZ: + return kCondLEZ; + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondEQZ: + return kCondNEZ; + case kCondNEZ: + return kCondEQZ; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kCondF: + return kCondT; + case kCondT: + return kCondF; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + } + UNREACHABLE(); +} - // Save the adjusted instructions - buffer_.Store<uint32_t>(position, auipc); - buffer_.Store<uint32_t>(position + 4, jic); +Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const { + return type_; +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const { + return condition_; +} + +GpuRegister Mips64Assembler::Branch::GetLeftRegister() const { + return lhs_reg_; +} + +GpuRegister Mips64Assembler::Branch::GetRightRegister() const { + return rhs_reg_; +} + +uint32_t Mips64Assembler::Branch::GetTarget() const { + return target_; +} + +uint32_t Mips64Assembler::Branch::GetLocation() const { + return location_; +} + +uint32_t Mips64Assembler::Branch::GetOldLocation() const { + return old_location_; +} + +uint32_t Mips64Assembler::Branch::GetLength() const { + return branch_info_[type_].length; +} + +uint32_t Mips64Assembler::Branch::GetOldLength() const { + return branch_info_[old_type_].length; +} + +uint32_t Mips64Assembler::Branch::GetSize() const { + return GetLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOldSize() const { + return GetOldLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetEndLocation() const { + return GetLocation() + GetSize(); +} + +uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldSize(); +} + +bool Mips64Assembler::Branch::IsLong() const { + switch (type_) { + // Short branches. + case kUncondBranch: + case kCondBranch: + case kCall: + return false; + // Long branches. + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + return true; + } + UNREACHABLE(); +} + +bool Mips64Assembler::Branch::IsResolved() const { + return target_ != kUnresolved; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + OffsetBits offset_size = + (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + ? kOffset23 + : branch_info_[type_].offset_size; + return offset_size; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location, + uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) + return kOffset16; + int64_t distance = static_cast<int64_t>(target) - location; + // To simplify calculations in composite branches consisting of multiple instructions + // bump up the distance by a value larger than the max byte size of a composite branch. + distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize; + if (IsInt<kOffset16>(distance)) + return kOffset16; + else if (IsInt<kOffset18>(distance)) + return kOffset18; + else if (IsInt<kOffset21>(distance)) + return kOffset21; + else if (IsInt<kOffset23>(distance)) + return kOffset23; + else if (IsInt<kOffset28>(distance)) + return kOffset28; + return kOffset32; +} + +void Mips64Assembler::Branch::Resolve(uint32_t target) { + target_ = target; +} + +void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + if (location_ > expand_location) { + location_ += delta; + } + if (!IsResolved()) { + return; // Don't know the target yet. + } + if (target_ > expand_location) { + target_ += delta; + } +} + +void Mips64Assembler::Branch::PromoteToLong() { + switch (type_) { + // Short branches. + case kUncondBranch: + type_ = kLongUncondBranch; + break; + case kCondBranch: + type_ = kLongCondBranch; + break; + case kCall: + type_ = kLongCall; + break; + default: + // Note: 'type_' is already long. + break; + } + CHECK(IsLong()); +} + +uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { + // If the branch is still unresolved or already long, nothing to do. + if (IsLong() || !IsResolved()) { + return 0; + } + // Promote the short branch to long if the offset size is too small + // to hold the distance between location_ and target_. + if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + // The following logic is for debugging/testing purposes. + // Promote some short branches to long when it's not really required. + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + int64_t distance = static_cast<int64_t>(target_) - location_; + distance = (distance >= 0) ? distance : -distance; + if (distance >= max_short_distance) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + } + return 0; +} + +uint32_t Mips64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + // Prepare the offset for encoding into the instruction(s). + offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; + return offset; +} + +Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Mips64Assembler::Bind(Mips64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); + + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); + + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); // On to the previous branch in the list... label->position_ = prev; } - // Now make the label object contain its own location - // (it will be used by the branches referring to and following this label) + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + label->prev_branch_id_plus_one_ = branches_.size(); + if (label->prev_branch_id_plus_one_) { + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + bound_pc -= branch->GetEndLocation(); + } label->BindTo(bound_pc); } -void Mips64Assembler::B(Label* label) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in jic - Auipc(AT, offset >> 16); - Jic(AT, offset); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(AT, prev >> 16); - Jic(AT, prev); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_plus_one_) { + // Get label location based on the branch preceding it. + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + target += branch->GetEndLocation(); + } + return target; +} + +uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetSize() - branch->GetOldSize(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { + uint32_t length = branches_.back().GetLength(); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + while (length--) { + Nop(); } } -void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in daddiu - Auipc(indirect_reg, offset >> 16); - Daddiu(indirect_reg, indirect_reg, offset); - Jialc(indirect_reg, 0); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(indirect_reg, prev >> 16); - Daddiu(indirect_reg, indirect_reg, prev); - Jialc(indirect_reg, 0); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +void Mips64Assembler::Buncond(Mips64Label* label) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, indirect_reg); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0; ) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } +} + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { + // Short branches. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch + { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + // Long branches. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch + { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch + { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall +}; + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + uint32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + GpuRegister lhs = branch->GetLeftRegister(); + GpuRegister rhs = branch->GetRightRegister(); + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondc(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden/delay slot. + break; + case Branch::kCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + Jialc(lhs, 0); + break; + + // Long branches. + case Branch::kLongUncondBranch: + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCondBranch: + EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(lhs, High16Bits(offset)); + Daddiu(lhs, lhs, Low16Bits(offset)); + Jialc(lhs, 0); + break; } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); +} + +void Mips64Assembler::Bc(Mips64Label* label) { + Buncond(label); +} + +void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { + Call(label, indirect_reg); +} + +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLT, rs, rt); +} + +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTZ, rt); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgec(rs, rt, 2); - B(label); +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGTZ, rt); } -void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) { - Bgezc(rt, 2); - B(label); +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGE, rs, rt); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) { - Blezc(rt, 2); - B(label); +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEZ, rt); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) { - Bltc(rs, rt, 2); - B(label); +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLEZ, rt); } -void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) { - Bltzc(rt, 2); - B(label); +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTU, rs, rt); } -void Mips64Assembler::Blezc(GpuRegister rt, Label* label) { - Bgtzc(rt, 2); - B(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEU, rs, rt); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgeuc(rs, rt, 2); - B(label); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondEQ, rs, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bltuc(rs, rt, 2); - B(label); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondNE, rs, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) { - Bnec(rs, rt, 2); - B(label); +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondEQZ, rs); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) { - Beqc(rs, rt, 2); - B(label); +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondNEZ, rs); } -void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) { - Bnezc(rs, 2); - B(label); +void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) { - Beqzc(rs, 2); - B(label); +void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); } void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1256,6 +1909,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1298,6 +1952,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address @@ -1316,6 +1971,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Then jump to the return address. Jr(RA); + Nop(); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -1324,12 +1980,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(-adjust)); cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(adjust)); cfi_.AdjustCFAOffset(-adjust); } @@ -1379,17 +2037,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) { - Mips64ManagedRegister scratch = mscratch.AsMips64(); - CHECK(scratch.IsGpuRegister()) << scratch; - // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?). - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsGpuRegister(), imm); - StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); -} - -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1398,7 +2046,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -1415,7 +2063,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -1449,18 +2099,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, } void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<8> offs) { + ThreadOffset<kMipsDoublewordSize> offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); } -void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64"; } -void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64"; } void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { @@ -1492,7 +2144,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, } void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, + ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; @@ -1500,7 +2152,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1561,9 +2213,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, @@ -1584,15 +2239,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset -/*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } -void Mips64Assembler::MemoryBarrier(ManagedRegister) { +void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) { // TODO: sync? - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, @@ -1604,7 +2262,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; CHECK(out_reg.IsGpuRegister()) << out_reg; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) @@ -1631,7 +2289,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is @@ -1653,7 +2311,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Mips64ManagedRegister in_reg = min_reg.AsMips64(); CHECK(out_reg.IsGpuRegister()) << out_reg; CHECK(in_reg.IsGpuRegister()) << in_reg; - Label null_arg; + Mips64Label null_arg; if (!out_reg.Equals(in_reg)) { LoadConst32(out_reg.AsGpuRegister(), 0); } @@ -1663,11 +2321,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Bind(&null_arg); } -void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } -void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } @@ -1679,6 +2339,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), base.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } @@ -1691,11 +2352,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { @@ -1703,37 +2366,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { } void Mips64Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*mscratch*/) { + ManagedRegister mscratch ATTRIBUTE_UNUSED) { StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); } void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { Mips64ManagedRegister scratch = mscratch.AsMips64(); - Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), - S1, Thread::ExceptionOffset<8>().Int32Value()); - Bnezc(scratch.AsGpuRegister(), slow->Entry()); -} - -void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { - Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); + exception_blocks_.emplace_back(scratch, stack_adjust); + LoadFromOffset(kLoadDoubleword, + scratch.AsGpuRegister(), + S1, + Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value()); + Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); +} + +void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { + Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); } - // Pass exception object as argument - // Don't care about preserving A0 as this call won't return - __ Move(A0, scratch_.AsGpuRegister()); + // Pass exception object as argument. + // Don't care about preserving A0 as this call won't return. + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); + Move(A0, exception->scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException - __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); - // TODO: check T9 usage - __ Jr(T9); + LoadFromOffset(kLoadDoubleword, + T9, + S1, + QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value()); + Jr(T9); + Nop(); + // Call never returns - __ Break(); -#undef __ + Break(); } } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 42962bca20..883f013f87 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,18 +17,22 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <utility> #include <vector> #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" -#include "utils/assembler.h" #include "offsets.h" +#include "utils/assembler.h" +#include "utils/label.h" namespace art { namespace mips64 { +static constexpr size_t kMipsDoublewordSize = 8; + enum LoadOperandType { kLoadSignedByte, kLoadUnsignedByte, @@ -60,10 +64,57 @@ enum FPClassMaskType { kPositiveZero = 0x200, }; +class Mips64Label : public Label { + public: + Mips64Label() : prev_branch_id_plus_one_(0) {} + + Mips64Label(Mips64Label&& src) + : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + + private: + uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64Label); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null. +class Mips64ExceptionSlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), + exception_entry_(std::move(src.exception_entry_)) {} + + private: + Mips64Label* Entry() { return &exception_entry_; } + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; + Mips64Label exception_entry_; + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); +}; + class Mips64Assembler FINAL : public Assembler { public: - Mips64Assembler() {} - virtual ~Mips64Assembler() {} + Mips64Assembler() + : overwriting_(false), + overwrite_location_(0), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Mips64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler { void Dclz(GpuRegister rd, GpuRegister rs); void Dclo(GpuRegister rd, GpuRegister rs); - void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void J(uint32_t addr26); - void Jal(uint32_t addr26); void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); void Auipc(GpuRegister rs, uint16_t imm16); + void Addiupc(GpuRegister rs, uint32_t imm19); + void Bc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -178,6 +227,8 @@ class Mips64Assembler FINAL : public Assembler { void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); void Beqzc(GpuRegister rs, uint32_t imm21); void Bnezc(GpuRegister rs, uint32_t imm21); + void Bc1eqz(FpuRegister ft, uint16_t imm16); + void Bc1nez(FpuRegister ft, uint16_t imm16); void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); @@ -217,6 +268,26 @@ class Mips64Assembler FINAL : public Assembler { void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void Cvtsw(FpuRegister fd, FpuRegister fs); void Cvtdw(FpuRegister fd, FpuRegister fs); @@ -240,32 +311,36 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); - // Higher level composite instructions + // Higher level composite instructions. void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 - void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { - B(label); + void Bind(Label* label) OVERRIDE { + Bind(down_cast<Mips64Label*>(label)); + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } - void B(Label* label); - void Jalr(Label* label, GpuRegister indirect_reg = RA); - // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); - void Bltzc(GpuRegister rt, Label* label); - void Bgtzc(GpuRegister rt, Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); - void Bgezc(GpuRegister rt, Label* label); - void Blezc(GpuRegister rt, Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); - void Beqzc(GpuRegister rs, Label* label); - void Bnezc(GpuRegister rs, Label* label); + + void Bind(Mips64Label* label); + void Bc(Mips64Label* label); + void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bltzc(GpuRegister rt, Mips64Label* label); + void Bgtzc(GpuRegister rt, Mips64Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgezc(GpuRegister rt, Mips64Label* label); + void Blezc(GpuRegister rt, Mips64Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqzc(GpuRegister rs, Mips64Label* label); + void Bnezc(GpuRegister rs, Mips64Label* label); + void Bc1eqz(FpuRegister ft, Mips64Label* label); + void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -277,43 +352,42 @@ class Mips64Assembler FINAL : public Assembler { void Emit(uint32_t value); // - // Overridden common assembler high-level functionality + // Overridden common assembler high-level functionality. // - // Emit code that will create an activation on the stack + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - // Emit code that will remove an activation from the stack + // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; - // Store routines + // Store routines. void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; - // Load routines + // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + void LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -322,15 +396,16 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE; - // Copying routines + // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; @@ -354,13 +429,13 @@ class Mips64Assembler FINAL : public Assembler { void MemoryBarrier(ManagedRegister) OVERRIDE; - // Sign extension + // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Zero extension + // Zero extension. void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Exploit fast access in managed code to Thread::Current() + // Exploit fast access in managed code to Thread::Current(). void GetCurrentThread(ManagedRegister tr) OVERRIDE; void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; @@ -376,7 +451,7 @@ class Mips64Assembler FINAL : public Assembler { void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, bool null_allowed) OVERRIDE; - // src holds a handle scope entry (Object**) load this into dst + // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; // Heap::VerifyObject on src. In some cases (such as a reference to this) we @@ -384,37 +459,255 @@ class Mips64Assembler FINAL : public Assembler { void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - // Call to address held at [base+offset] + // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset, + ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + // Emit slow paths queued during assembly and promote short branches to long if needed. + void FinalizeCode() OVERRIDE; + + // Emit branches and finalize all instructions. + void FinalizeInstructions(const MemoryRegion& region); + + // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, + // must be used instead of Mips64Label::GetPosition()). + uint32_t GetLabelLocation(Mips64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + enum BranchCondition { + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTZ, + kCondGEZ, + kCondLEZ, + kCondGTZ, + kCondEQ, + kCondNE, + kCondEQZ, + kCondNEZ, + kCondLTU, + kCondGEU, + kCondF, // Floating-point predicate false. + kCondT, // Floating-point predicate true. + kUncond, + }; + friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + private: + class Branch { + public: + enum Type { + // Short branches. + kUncondBranch, + kCondBranch, + kCall, + // Long branches. + kLongUncondBranch, + kLongCondBranch, + kLongCall, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset16 = 16, + kOffset18 = 18, + kOffset21 = 21, + kOffset23 = 23, + kOffset28 = 28, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr int32_t kMaxBranchLength = 32; + static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + + struct BranchInfo { + // Branch length as a number of 4-byte-long instructions. + uint32_t length; + // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's + // PC-relative offset (or its most significant 16-bit half, which goes first). + uint32_t instr_offset; + // Different MIPS instructions with PC-relative offsets apply said offsets to slightly + // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte + // instructions) from the instruction containing the offset. + uint32_t pc_org; + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is + // an exception: use kOffset23 for beqzc/bnezc). + OffsetBits offset_size; + // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift + // count. + int offset_shift; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch. + Branch(uint32_t location, uint32_t target); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg = ZERO); + // Call (branch and link) that stores the target address in a given register (i.e. T9). + Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. + // So, we need a way to identify such branches in order to emit no instructions for them + // or change them to unconditional. + static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + GpuRegister GetLeftRegister() const; + GpuRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetSize() const; + uint32_t GetOldSize() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsLong() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + // + // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), + // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. + // + // Composite branches (made of several instructions) with longer reach have 32-bit + // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). + // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, + // however. Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. + // + // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special + // case with the addiu instruction and a 16 bit offset. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If the branch is short, changes its type to long. + void PromoteToLong(); + + // If necessary, updates the type by promoting a short branch to a long branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + // max_short_distance caps the maximum distance between location_ and target_ + // that is allowed for short branches. This is for debugging/testing purposes. + // max_short_distance = 0 forces all short branches to become long. + // Use the implicit default argument when not debugging/testing. + uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + + // Returns the location of the instruction(s) containing the offset. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + uint32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(bool is_call); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + GpuRegister lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + GpuRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); + friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); - void EmitJ(int opcode, uint32_t addr26); + void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); - DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); -}; + void Buncond(Mips64Label* label); + void Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs = ZERO); + void Call(Mips64Label* label, GpuRegister indirect_reg); + void FinalizeLabeledBranch(Mips64Label* label); -// Slowpath entered when Thread::Current()->_exception is non-null -class Mips64ExceptionSlowPath FINAL : public SlowPath { - public: - Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const Mips64ManagedRegister scratch_; - const size_t stack_adjust_; + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void PromoteBranches(); + void EmitBranch(Branch* branch); + void EmitBranches(); + void PatchCFI(); + + // Emits exception block. + void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + + // List of exception blocks to generate at the end of the code cache. + std::vector<Mips64ExceptionSlowPath> exception_blocks_; + + std::vector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 4413906fd7..bac4375b35 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -24,6 +24,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPS64CpuRegisterCompare { @@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return "mips64"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -march=mips64r6"; + // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 + // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return secondary_register_names_[reg]; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips64::GpuRegister*> registers_; std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; @@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - /////////////////// // FP Operations // /////////////////// @@ -319,6 +403,106 @@ TEST_F(AssemblerMIPS64Test, MaxD) { DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d"); } +TEST_F(AssemblerMIPS64Test, CmpUnS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUnD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.d"); +} + TEST_F(AssemblerMIPS64Test, CvtDL) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l"); } @@ -348,7 +532,255 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { //////////////// TEST_F(AssemblerMIPS64Test, Jalr) { - DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); + DriverStr(".set noreorder\n" + + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + +TEST_F(AssemblerMIPS64Test, Jialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + std::string expected = + ".set noreorder\n" + "lapc $t9, 1f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "lapc $t9, 2f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "lapc $t9, 1b\n" + "jialc $t9, 0\n"; + DriverStr(expected, "Jialc"); +} + +TEST_F(AssemblerMIPS64Test, LongJialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr uint32_t kAdduCount1 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr uint32_t kAdduCount2 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "1:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "2:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "jialc $t9, 0\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongJialc"); +} + +TEST_F(AssemblerMIPS64Test, Bc) { + mips64::Mips64Label label1, label2; + __ Bc(&label1); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Bc(&label2); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Bc(&label1); + + std::string expected = + ".set noreorder\n" + "bc 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc 1b\n"; + DriverStr(expected, "Bc"); +} + +TEST_F(AssemblerMIPS64Test, Beqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS64Test, Bnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS64Test, Bltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS64Test, Bgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS64Test, Blezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS64Test, Bgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS64Test, Beqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS64Test, Bnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS64Test, Bltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS64Test, Bgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS64Test, Bltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS64Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS64Test, Bc1eqz) { + mips64::Mips64Label label; + __ Bc1eqz(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1eqz(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1eqz $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1eqz $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1eqz"); +} + +TEST_F(AssemblerMIPS64Test, Bc1nez) { + mips64::Mips64Label label; + __ Bc1nez(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1nez(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1nez $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1nez $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1nez"); +} + +TEST_F(AssemblerMIPS64Test, LongBeqc) { + mips64::Mips64Label label; + __ Beqc(mips64::A0, mips64::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Beqc(mips64::A2, mips64::A3, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } ////////// |