diff options
-rw-r--r-- | compiler/utils/assembler_test_base.h | 12 | ||||
-rw-r--r-- | compiler/utils/label.h | 6 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64.cc | 905 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64.h | 357 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64_test.cc | 665 | ||||
-rw-r--r-- | runtime/arch/riscv64/registers_riscv64.h | 3 |
6 files changed, 1925 insertions, 23 deletions
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index 8b76643e98..fb950f8237 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -142,8 +142,15 @@ class AssemblerTestBase : public testing::Test { InstructionSet isa = GetIsa(); switch (isa) { case InstructionSet::kRiscv64: - // TODO: Support compression (RV32C) in assembler and tests (add `c` to `-march=`). - return {FindTool("clang"), "--compile", "-target", "riscv64-linux-gnu", "-march=rv64imafd"}; + // TODO(riscv64): Support compression (RV32C) in assembler and tests (add `c` to `-march=`). + return {FindTool("clang"), + "--compile", + "-target", + "riscv64-linux-gnu", + "-march=rv64imafd", + // Force the assembler to fully emit branch instructions instead of leaving + // offsets unresolved with relocation information for the linker. + "-mno-relax"}; case InstructionSet::kX86: return {FindTool("clang"), "--compile", "-target", "i386-linux-gnu"}; case InstructionSet::kX86_64: @@ -167,6 +174,7 @@ class AssemblerTestBase : public testing::Test { "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn", + "--mattr=+F,+D", // Disassemble "F" and "D" Standard Extensions. "-M", "no-aliases"}; default: diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 0368d90a26..25bf01376b 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -31,6 +31,10 @@ class AssemblerFixup; namespace arm64 { class Arm64Assembler; } // namespace arm64 +namespace riscv64 { +class Riscv64Assembler; +class Riscv64Label; +} // namespace riscv64 namespace x86 { class X86Assembler; class NearLabel; @@ -109,6 +113,8 @@ class Label { } friend class arm64::Arm64Assembler; + friend class riscv64::Riscv64Assembler; + friend class riscv64::Riscv64Label; friend class x86::X86Assembler; friend class x86::NearLabel; friend class x86_64::X86_64Assembler; diff --git a/compiler/utils/riscv64/assembler_riscv64.cc b/compiler/utils/riscv64/assembler_riscv64.cc index fd6fae35fc..80b97a3b4c 100644 --- a/compiler/utils/riscv64/assembler_riscv64.cc +++ b/compiler/utils/riscv64/assembler_riscv64.cc @@ -27,16 +27,45 @@ static_assert(static_cast<size_t>(kRiscv64PointerSize) == kRiscv64DoublewordSize "Unexpected Riscv64 pointer size."); static_assert(kRiscv64PointerSize == PointerSize::k64, "Unexpected Riscv64 pointer size."); +// Split 32-bit offset into an `imm20` for LUI/AUIPC and +// a signed 12-bit short offset for ADDI/JALR/etc. +ALWAYS_INLINE static inline std::pair<uint32_t, int32_t> SplitOffset(int32_t offset) { + // The highest 0x800 values are out of range. + DCHECK_LT(offset, 0x7ffff800); + // Round `offset` to nearest 4KiB offset because short offset has range [-0x800, 0x800). + int32_t near_offset = (offset + 0x800) & ~0xfff; + // Calculate the short offset. + int32_t short_offset = offset - near_offset; + DCHECK(IsInt<12>(short_offset)); + // Extract the `imm20`. + uint32_t imm20 = static_cast<uint32_t>(near_offset) >> 12; + // Return the result as a pair. + return std::make_pair(imm20, short_offset); +} + void Riscv64Assembler::FinalizeCode() { + ReserveJumpTableSpace(); + EmitLiterals(); + PromoteBranches(); } void Riscv64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + EmitJumpTables(); Assembler::FinalizeInstructions(region); + PatchCFI(); } void Riscv64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } /////////////////////////////// RV64 VARIANTS extension /////////////////////////////// @@ -653,11 +682,11 @@ void Riscv64Assembler::FAbsS(FRegister rd, FRegister rs) { FSgnjxS(rd, rs, rs); void Riscv64Assembler::FNegS(FRegister rd, FRegister rs) { FSgnjnS(rd, rs, rs); } -void Riscv64Assembler::FMvD(FRegister rd, FRegister rs) { FSgnjS(rd, rs, rs); } +void Riscv64Assembler::FMvD(FRegister rd, FRegister rs) { FSgnjD(rd, rs, rs); } -void Riscv64Assembler::FAbsD(FRegister rd, FRegister rs) { FSgnjxS(rd, rs, rs); } +void Riscv64Assembler::FAbsD(FRegister rd, FRegister rs) { FSgnjxD(rd, rs, rs); } -void Riscv64Assembler::FNegD(FRegister rd, FRegister rs) { FSgnjnS(rd, rs, rs); } +void Riscv64Assembler::FNegD(FRegister rd, FRegister rs) { FSgnjnD(rd, rs, rs); } void Riscv64Assembler::Beqz(XRegister rs, int32_t offset) { Beq(rs, Zero, offset); @@ -711,8 +740,874 @@ void Riscv64Assembler::Jalr(XRegister rd, XRegister rs) { Jalr(rd, rs, 0); } void Riscv64Assembler::Ret() { Jalr(Zero, RA, 0); } +void Riscv64Assembler::Beqz(XRegister rs, Riscv64Label* label, bool is_bare) { + Beq(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bnez(XRegister rs, Riscv64Label* label, bool is_bare) { + Bne(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Blez(XRegister rs, Riscv64Label* label, bool is_bare) { + Ble(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bgez(XRegister rs, Riscv64Label* label, bool is_bare) { + Bge(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bltz(XRegister rs, Riscv64Label* label, bool is_bare) { + Blt(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bgtz(XRegister rs, Riscv64Label* label, bool is_bare) { + Bgt(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondEQ, rs, rt); +} + +void Riscv64Assembler::Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondNE, rs, rt); +} + +void Riscv64Assembler::Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLE, rs, rt); +} + +void Riscv64Assembler::Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGE, rs, rt); +} + +void Riscv64Assembler::Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLT, rs, rt); +} + +void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGT, rs, rt); +} + +void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLEU, rs, rt); +} + +void Riscv64Assembler::Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGEU, rs, rt); +} + +void Riscv64Assembler::Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLTU, rs, rt); +} + +void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGTU, rs, rt); +} + +void Riscv64Assembler::Jal(XRegister rd, Riscv64Label* label, bool is_bare) { + Buncond(label, rd, is_bare); +} + +void Riscv64Assembler::J(Riscv64Label* label, bool is_bare) { + Jal(Zero, label, is_bare); +} + +void Riscv64Assembler::Jal(Riscv64Label* label, bool is_bare) { + Jal(RA, label, is_bare); +} + +void Riscv64Assembler::Lw(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteral); +} + +void Riscv64Assembler::Lwu(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteralUnsigned); +} + +void Riscv64Assembler::Ld(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + LoadLiteral(literal, rd, Branch::kLiteralLong); +} + /////////////////////////////// RV64 MACRO Instructions END /////////////////////////////// +const Riscv64Assembler::Branch::BranchInfo Riscv64Assembler::Branch::branch_info_[] = { + // Short branches (can be promoted to longer). + {4, 0, Riscv64Assembler::Branch::kOffset13}, // kCondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kUncondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kCall + // Short branches (can't be promoted to longer). + {4, 0, Riscv64Assembler::Branch::kOffset13}, // kBareCondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareUncondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareCall + + // Medium branch. + {8, 4, Riscv64Assembler::Branch::kOffset21}, // kCondBranch21 + + // Long branches. + {12, 4, Riscv64Assembler::Branch::kOffset32}, // kLongCondBranch + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongUncondBranch + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongCall + + // label. + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLabel + + // literals. + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteral + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralUnsigned + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralLong +}; + +void Riscv64Assembler::Branch::InitShortOrLong(Riscv64Assembler::Branch::OffsetBits offset_size, + Riscv64Assembler::Branch::Type short_type, + Riscv64Assembler::Branch::Type long_type, + Riscv64Assembler::Branch::Type longest_type) { + Riscv64Assembler::Branch::Type type = short_type; + if (offset_size > branch_info_[type].offset_size) { + type = long_type; + if (offset_size > branch_info_[type].offset_size) { + type = longest_type; + } + } + type_ = type; +} + +void Riscv64Assembler::Branch::InitializeType(Type initial_type) { + OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_); + + switch (initial_type) { + case kCondBranch: + if (condition_ != kUncond) { + InitShortOrLong(offset_size_needed, kCondBranch, kCondBranch21, kLongCondBranch); + break; + } + FALLTHROUGH_INTENDED; + case kUncondBranch: + InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch, kLongUncondBranch); + break; + case kCall: + InitShortOrLong(offset_size_needed, kCall, kLongCall, kLongCall); + break; + case kBareCondBranch: + if (condition_ != kUncond) { + type_ = kBareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + } + FALLTHROUGH_INTENDED; + case kBareUncondBranch: + type_ = kBareUncondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kBareCall: + type_ = kBareCall; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kLabel: + type_ = initial_type; + break; + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: + CHECK(!IsResolved()); + type_ = initial_type; + break; + default: + LOG(FATAL) << "Unexpected branch type " << enum_cast<uint32_t>(initial_type); + UNREACHABLE(); + } + + old_type_ = type_; +} + +bool Riscv64Assembler::Branch::IsNop(BranchCondition condition, XRegister lhs, XRegister rhs) { + switch (condition) { + case kCondNE: + case kCondLT: + case kCondGT: + case kCondLTU: + case kCondGTU: + return lhs == rhs; + default: + return false; + } +} + +bool Riscv64Assembler::Branch::IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondEQ: + case kCondGE: + case kCondLE: + case kCondLEU: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Riscv64Assembler::Branch::Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(rd), + rhs_reg_(Zero), + condition_(kUncond) { + InitializeType( + (rd != Zero ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareUncondBranch : kUncondBranch))); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Riscv64Assembler::BranchCondition condition, + XRegister lhs_reg, + XRegister rhs_reg, + bool is_bare) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + DCHECK_NE(condition, kUncond); + DCHECK(!IsNop(condition, lhs_reg, rhs_reg)); + DCHECK(!IsUncond(condition, lhs_reg, rhs_reg)); + InitializeType(is_bare ? kBareCondBranch : kCondBranch); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + XRegister rd, + Type label_or_literal_type) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(rd), + rhs_reg_(Zero), + condition_(kUncond) { + CHECK_NE(rd , Zero); + InitializeType(label_or_literal_type); +} + +Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::OppositeCondition( + Riscv64Assembler::BranchCondition cond) { + switch (cond) { + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kCondLEU: + return kCondGTU; + case kCondGTU: + return kCondLEU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond); + UNREACHABLE(); + } +} + +Riscv64Assembler::Branch::Type Riscv64Assembler::Branch::GetType() const { return type_; } + +Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::GetCondition() const { + return condition_; +} + +XRegister Riscv64Assembler::Branch::GetLeftRegister() const { return lhs_reg_; } + +XRegister Riscv64Assembler::Branch::GetRightRegister() const { return rhs_reg_; } + +uint32_t Riscv64Assembler::Branch::GetTarget() const { return target_; } + +uint32_t Riscv64Assembler::Branch::GetLocation() const { return location_; } + +uint32_t Riscv64Assembler::Branch::GetOldLocation() const { return old_location_; } + +uint32_t Riscv64Assembler::Branch::GetLength() const { return branch_info_[type_].length; } + +uint32_t Riscv64Assembler::Branch::GetOldLength() const { return branch_info_[old_type_].length; } + +uint32_t Riscv64Assembler::Branch::GetEndLocation() const { return GetLocation() + GetLength(); } + +uint32_t Riscv64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldLength(); +} + +bool Riscv64Assembler::Branch::IsBare() const { + switch (type_) { + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: + return true; + default: + return false; + } +} + +bool Riscv64Assembler::Branch::IsResolved() const { return target_ != kUnresolved; } + +Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSize() const { + return branch_info_[type_].offset_size; +} + +Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSizeNeeded( + uint32_t location, uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) { + return kOffset13; + } + int64_t distance = static_cast<int64_t>(target) - location; + if (IsInt<kOffset13>(distance)) { + return kOffset13; + } else if (IsInt<kOffset21>(distance)) { + return kOffset21; + } else { + return kOffset32; + } +} + +void Riscv64Assembler::Branch::Resolve(uint32_t target) { target_ = target; } + +void Riscv64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + // All targets should be resolved before we start promoting branches. + DCHECK(IsResolved()); + if (location_ > expand_location) { + location_ += delta; + } + if (target_ > expand_location) { + target_ += delta; + } +} + +uint32_t Riscv64Assembler::Branch::PromoteIfNeeded() { + // All targets should be resolved before we start promoting branches. + DCHECK(IsResolved()); + Type old_type = type_; + switch (type_) { + // Short branches (can be promoted to longer). + case kCondBranch: { + OffsetBits needed_size = GetOffsetSizeNeeded(GetOffsetLocation(), target_); + if (needed_size <= GetOffsetSize()) { + return 0u; + } + // The offset remains the same for `kCondBranch21` for forward branches. + DCHECK_EQ(branch_info_[kCondBranch21].length - branch_info_[kCondBranch21].pc_offset, + branch_info_[kCondBranch].length - branch_info_[kCondBranch].pc_offset); + if (target_ <= location_) { + // Calculate the needed size for kCondBranch21. + needed_size = + GetOffsetSizeNeeded(location_ + branch_info_[kCondBranch21].pc_offset, target_); + } + type_ = (needed_size <= branch_info_[kCondBranch21].offset_size) + ? kCondBranch21 + : kLongCondBranch; + break; + } + case kUncondBranch: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongUncondBranch; + break; + case kCall: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongCall; + break; + // Medium branch (can be promoted to long). + case kCondBranch21: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongCondBranch; + break; + default: + // Other branch types cannot be promoted. + DCHECK_LE(GetOffsetSizeNeeded(GetOffsetLocation(), target_), GetOffsetSize()) << type_; + return 0u; + } + DCHECK(type_ != old_type); + DCHECK_GT(branch_info_[type_].length, branch_info_[old_type].length); + return branch_info_[type_].length - branch_info_[old_type].length; +} + +uint32_t Riscv64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].pc_offset; +} + +int32_t Riscv64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset_location = GetOffsetLocation(); + int32_t offset = static_cast<int32_t>(target_ - offset_location); + DCHECK_EQ(offset, static_cast<int64_t>(target_) - static_cast<int64_t>(offset_location)); + return offset; +} + +void Riscv64Assembler::EmitBcond(BranchCondition cond, + XRegister rs, + XRegister rt, + int32_t offset) { + switch (cond) { +#define DEFINE_CASE(COND, cond) \ + case kCond##COND: \ + B##cond(rs, rt, offset); \ + break; + DEFINE_CASE(EQ, eq) + DEFINE_CASE(NE, ne) + DEFINE_CASE(LT, lt) + DEFINE_CASE(GE, ge) + DEFINE_CASE(LE, le) + DEFINE_CASE(GT, gt) + DEFINE_CASE(LTU, ltu) + DEFINE_CASE(GEU, geu) + DEFINE_CASE(LEU, leu) + DEFINE_CASE(GTU, gtu) +#undef DEFINE_CASE + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond); + UNREACHABLE(); + } +} + +void Riscv64Assembler::EmitBranch(Riscv64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + const int32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + XRegister lhs = branch->GetLeftRegister(); + XRegister rhs = branch->GetRightRegister(); + + auto emit_auipc_and_next = [&](XRegister reg, auto next) { + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + auto [imm20, short_offset] = SplitOffset(offset); + Auipc(reg, imm20); + next(short_offset); + }; + + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + case Branch::kBareUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + J(offset); + break; + case Branch::kCondBranch: + case Branch::kBareCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcond(condition, lhs, rhs, offset); + break; + case Branch::kCall: + case Branch::kBareCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + DCHECK(lhs != Zero); + Jal(lhs, offset); + break; + + // Medium branch. + case Branch::kCondBranch21: + EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength()); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + J(offset); + break; + + // Long branches. + case Branch::kLongCondBranch: + EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength()); + FALLTHROUGH_INTENDED; + case Branch::kLongUncondBranch: + emit_auipc_and_next(TMP, [&](int32_t short_offset) { Jalr(Zero, TMP, short_offset); }); + break; + case Branch::kLongCall: + DCHECK(lhs != Zero); + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Jalr(lhs, lhs, short_offset); }); + break; + + // label. + case Branch::kLabel: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Addi(lhs, lhs, short_offset); }); + break; + // literals. + case Branch::kLiteral: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lw(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralUnsigned: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lwu(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralLong: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Ld(lhs, lhs, short_offset); }); + break; + } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LE(branch->GetLength(), static_cast<uint32_t>(Branch::kMaxBranchLength)); +} + +void Riscv64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + +void Riscv64Assembler::FinalizeLabeledBranch(Riscv64Label* label) { + // TODO(riscv64): Support "C" Standard Extension - length may not be a multiple of 4. + DCHECK_ALIGNED(branches_.back().GetLength(), sizeof(uint32_t)); + uint32_t length = branches_.back().GetLength() / sizeof(uint32_t); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + for (; length != 0u; --length) { + Nop(); + } +} + +void Riscv64Assembler::Bcond( + Riscv64Label* label, bool is_bare, BranchCondition condition, XRegister lhs, XRegister rhs) { + // TODO(riscv64): Should an assembler perform these optimizations, or should we remove them? + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + if (Branch::IsUncond(condition, lhs, rhs)) { + Buncond(label, Zero, is_bare); + return; + } + + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs, is_bare); + FinalizeLabeledBranch(label); +} + +void Riscv64Assembler::Buncond(Riscv64Label* label, XRegister rd, bool is_bare) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, rd, is_bare); + FinalizeLabeledBranch(label); +} + +void Riscv64Assembler::LoadLiteral(Literal* literal, XRegister rd, Branch::Type literal_type) { + Riscv64Label* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), Branch::kUnresolved, rd, literal_type); + FinalizeLabeledBranch(label); +} + +Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Riscv64Assembler::Bind(Riscv64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); + + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); + + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); + + // On to the previous branch in the list... + label->position_ = prev; + } + + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + uint32_t prev_branch_id = Riscv64Label::kNoPrevBranchId; + if (!branches_.empty()) { + prev_branch_id = branches_.size() - 1u; + const Branch* prev_branch = GetBranch(prev_branch_id); + bound_pc -= prev_branch->GetEndLocation(); + } + label->prev_branch_id_ = prev_branch_id; + label->BindTo(bound_pc); +} + +void Riscv64Assembler::LoadLabelAddress(XRegister rd, Riscv64Label* label) { + DCHECK_NE(rd, Zero); + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, rd, Branch::kLabel); + FinalizeLabeledBranch(label); +} + +Literal* Riscv64Assembler::NewLiteral(size_t size, const uint8_t* data) { + // We don't support byte and half-word literals. + if (size == 4u) { + literals_.emplace_back(size, data); + return &literals_.back(); + } else { + DCHECK_EQ(size, 8u); + long_literals_.emplace_back(size, data); + return &long_literals_.back(); + } +} + +JumpTable* Riscv64Assembler::CreateJumpTable(ArenaVector<Riscv64Label*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + +uint32_t Riscv64Assembler::GetLabelLocation(const Riscv64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_ != Riscv64Label::kNoPrevBranchId) { + // Get label location based on the branch preceding it. + const Branch* prev_branch = GetBranch(label->prev_branch_id_); + target += prev_branch->GetEndLocation(); + } + return target; +} + +uint32_t Riscv64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the `old_position`. Since we expect `GetAdjustedPosition()` to be called in a loop + // with increasing `old_position`, we can use the data from last `GetAdjustedPosition()` to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetLength() - branch->GetOldLength(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Riscv64Assembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + Riscv64Label* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with placeholder data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; ++i) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void Riscv64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta != 0u) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0;) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } + + // Align 64-bit literals by moving them up by 4 bytes if needed. + // This can increase the PC-relative distance but all literals are accessed with AUIPC+Load(imm12) + // without branch promotion, so this late adjustment cannot take them out of instruction range. + if (!long_literals_.empty()) { + uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel()); + size_t lit_size = long_literals_.size() * sizeof(uint64_t); + size_t buf_size = buffer_.Size(); + // 64-bit literals must be at the very end of the buffer. + CHECK_EQ(first_literal_location + lit_size, buf_size); + if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) { + // Insert the padding. + buffer_.Resize(buf_size + sizeof(uint32_t)); + buffer_.Move(first_literal_location + sizeof(uint32_t), first_literal_location, lit_size); + DCHECK(!overwriting_); + overwriting_ = true; + overwrite_location_ = first_literal_location; + Emit(0); // Illegal instruction. + overwriting_ = false; + // Increase target addresses in literal and address loads by 4 bytes in order for correct + // offsets from PC to be generated. + for (auto& branch : branches_) { + uint32_t target = branch.GetTarget(); + if (target >= first_literal_location) { + branch.Resolve(target + sizeof(uint32_t)); + } + } + // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal, + // we need to adjust the location of the literal's label as well. + for (Literal& literal : long_literals_) { + // Bound label's position is negative, hence decrementing it instead of incrementing. + literal.GetLabel()->position_ -= sizeof(uint32_t); + } + } + } +} + +void Riscv64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + static constexpr size_t kExtraSpace = 16; // Not every PC advance can be encoded in one byte. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + kExtraSpace); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Riscv64Assembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + Riscv64Label* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (Riscv64Label* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + +void Riscv64Assembler::EmitLiterals() { + if (!literals_.empty()) { + for (Literal& literal : literals_) { + Riscv64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 4u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } + if (!long_literals_.empty()) { + // These need to be 8-byte-aligned but we shall add the alignment padding after the branch + // promotion, if needed. Since all literals are accessed with AUIPC+Load(imm12) without branch + // promotion, this late adjustment cannot take long literals out of instruction range. + for (Literal& literal : long_literals_) { + Riscv64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + /////////////////////////////// RV64 VARIANTS extension end //////////// } // namespace riscv64 diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h index 0d781231ba..1071add881 100644 --- a/compiler/utils/riscv64/assembler_riscv64.h +++ b/compiler/utils/riscv64/assembler_riscv64.h @@ -41,7 +41,11 @@ enum class FPRoundingMode : uint32_t { kRUP = 0x3, // Round Up (towards +Infinity) kRMM = 0x4, // Round to Nearest, ties to Max Magnitude kDYN = 0x7, // Dynamic rounding mode - kDefault = kDYN + kDefault = kDYN, + // Some instructions never need to round even though the spec includes the RM field. + // To simplify testing, emit the RM as 0 by default for these instructions because that's what + // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes. + kIgnored = 0 }; static constexpr size_t kRiscv64HalfwordSize = 2; @@ -49,17 +53,97 @@ static constexpr size_t kRiscv64WordSize = 4; static constexpr size_t kRiscv64DoublewordSize = 8; class Riscv64Label : public Label { + public: + Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {} + + Riscv64Label(Riscv64Label&& src) + : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {} + + private: + static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max(); + + uint32_t prev_branch_id_; // To get distance from preceding branch, if any. + + friend class Riscv64Assembler; + DISALLOW_COPY_AND_ASSIGN(Riscv64Label); +}; + +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { return size_; } + + const uint8_t* GetData() const { return data_; } + + Riscv64Label* GetLabel() { return &label_; } + + const Riscv64Label* GetLabel() const { return &label_; } + + private: + Riscv64Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + +// Jump table: table of labels emitted after the code and before the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {} + + size_t GetSize() const { return labels_.size() * sizeof(int32_t); } + + const ArenaVector<Riscv64Label*>& GetData() const { return labels_; } + + Riscv64Label* GetLabel() { return &label_; } + + const Riscv64Label* GetLabel() const { return &label_; } + + private: + Riscv64Label label_; + ArenaVector<Riscv64Label*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); }; class Riscv64Assembler final : public Assembler { public: explicit Riscv64Assembler(ArenaAllocator* allocator, const Riscv64InstructionSetFeatures* instruction_set_features = nullptr) - : Assembler(allocator) { + : Assembler(allocator), + branches_(allocator->Adapter(kArenaAllocAssembler)), + overwriting_(false), + overwrite_location_(0), + literals_(allocator->Adapter(kArenaAllocAssembler)), + long_literals_(allocator->Adapter(kArenaAllocAssembler)), + jump_tables_(allocator->Adapter(kArenaAllocAssembler)), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { UNUSED(instruction_set_features); + cfi().DelayEmittingAdvancePCs(); } virtual ~Riscv64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } } size_t CodeSize() const override { return Assembler::CodeSize(); } @@ -254,7 +338,7 @@ class Riscv64Assembler final : public Assembler { FCvtSD(rd, rs1, FPRoundingMode::kDefault); } void FCvtDS(FRegister rd, FRegister rs1) { - FCvtDS(rd, rs1, FPRoundingMode::kDefault); + FCvtDS(rd, rs1, FPRoundingMode::kIgnored); } // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D @@ -293,9 +377,9 @@ class Riscv64Assembler final : public Assembler { void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); } void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); } void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); } - void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); } void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); } - void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); } void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); } void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); } void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); } @@ -356,15 +440,62 @@ class Riscv64Assembler final : public Assembler { void Jalr(XRegister rd, XRegister rs); void Ret(); + // Jumps and branches to a label. + void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false); + void J(Riscv64Label* label, bool is_bare = false); + void Jal(Riscv64Label* label, bool is_bare = false); + + // Literal load. + void Lw(XRegister rd, Literal* literal); + void Lwu(XRegister rd, Literal* literal); + void Ld(XRegister rd, Literal* literal); + /////////////////////////////// RV64 MACRO Instructions END /////////////////////////////// - void Bind([[maybe_unused]] Label* label) override { - UNIMPLEMENTED(FATAL) << "TODO: Support branches."; - } + void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); } + void Jump([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64"; } + void Bind(Riscv64Label* label); + + // Load label address using PC-relative loads. + void LoadLabelAddress(XRegister rd, Riscv64Label* label); + + // Create a new literal with a given value. + // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels); + public: // Emit data (e.g. encoded instruction or immediate) to the instruction stream. void Emit(uint32_t value); @@ -375,7 +506,197 @@ class Riscv64Assembler final : public Assembler { // Emit branches and finalize all instructions. void FinalizeInstructions(const MemoryRegion& region) override; + // Returns the current location of a label. + // + // This function must be used instead of `Riscv64Label::GetPosition()` + // which returns assembler's internal data instead of an actual location. + // + // The location can change during branch fixup in `FinalizeCode()`. Before that, + // the location is not final and therefore not very useful to external users, + // so they should preferably retrieve the location only after `FinalizeCode()`. + uint32_t GetLabelLocation(const Riscv64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + private: + enum BranchCondition : uint8_t { + kCondEQ, + kCondNE, + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTU, + kCondGEU, + kCondLEU, + kCondGTU, + kUncond, + }; + + // Note that PC-relative literal loads are handled as pseudo branches because they need + // to be emitted after branch relocation to use correct offsets. + class Branch { + public: + enum Type : uint8_t { + // TODO(riscv64): Support 16-bit instructions ("C" Standard Extension). + + // Short branches (can be promoted to longer). + kCondBranch, + kUncondBranch, + kCall, + // Short branches (can't be promoted to longer). + // TODO(riscv64): Do we need these (untested) bare branches, or can we remove them? + kBareCondBranch, + kBareUncondBranch, + kBareCall, + + // Medium branch (can be promoted to long). + kCondBranch21, + + // Long branches. + kLongCondBranch, + kLongUncondBranch, + kLongCall, + + // Label. + kLabel, + + // Literals. + kLiteral, + kLiteralUnsigned, + kLiteralLong, + // TODO(riscv64): Add FP literals. + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset13 = 13, + kOffset21 = 21, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr uint32_t kMaxBranchLength = 12; // In bytes. + + struct BranchInfo { + // Branch length in bytes. + uint32_t length; + // The offset in bytes of the PC used in the (only) PC-relative instruction from + // the start of the branch sequence. RISC-V always uses the address of the PC-relative + // instruction as the PC, so this is essentially the offset of that instruction. + uint32_t pc_offset; + // How large (in bits) a PC-relative offset can be for a given type of branch. + OffsetBits offset_size; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch or call. + Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + XRegister lhs_reg, + XRegister rhs_reg, + bool is_bare); + // Label address (in literal area) or literal. + Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. + static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs); + static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + XRegister GetLeftRegister() const; + XRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsBare() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If necessary, updates the type by promoting a short branch to a longer branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + uint32_t PromoteIfNeeded(); + + // Returns the offset into assembler buffer that shall be used as the base PC for + // offset calculation. RISC-V always uses the address of the PC-relative instruction + // as the PC, so this is essentially the location of that instruction. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + int32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(Type initial_type); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + XRegister lhs_reg_; // Left-hand side register in conditional branches or + // destination register in calls or literals. + XRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + + // Branch and literal fixup. + + void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset); + void EmitBranch(Branch* branch); + void EmitBranches(); + void EmitJumpTables(); + void EmitLiterals(); + + void FinalizeLabeledBranch(Riscv64Label* label); + void Bcond(Riscv64Label* label, + bool is_bare, + BranchCondition condition, + XRegister lhs, + XRegister rhs); + void Buncond(Riscv64Label* label, XRegister rd, bool is_bare); + void LoadLiteral(Literal* literal, XRegister rd, Branch::Type literal_type); + + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void ReserveJumpTableSpace(); + void PromoteBranches(); + void PatchCFI(); + + // Emit helpers. + template <typename Reg1, typename Reg2> void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) { DCHECK(IsInt<12>(imm12)) << imm12; @@ -491,6 +812,26 @@ class Riscv64Assembler final : public Assembler { Emit(encoding); } + ArenaVector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Use `std::deque<>` for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. + + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + + // Data for `GetAdjustedPosition()`, see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + static constexpr uint32_t kXlen = 64; DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler); diff --git a/compiler/utils/riscv64/assembler_riscv64_test.cc b/compiler/utils/riscv64/assembler_riscv64_test.cc index 20d607beff..be20faa169 100644 --- a/compiler/utils/riscv64/assembler_riscv64_test.cc +++ b/compiler/utils/riscv64/assembler_riscv64_test.cc @@ -26,6 +26,7 @@ #define __ GetAssembler()-> namespace art { +namespace riscv64 { struct RISCV64CpuRegisterCompare { bool operator()(const riscv64::XRegister& a, const riscv64::XRegister& b) const { return a < b; } @@ -178,6 +179,358 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, uint32_t CreateImmediate(int64_t imm_value) override { return imm_value; } + template <typename Emit> + std::string RepeatInsn(size_t count, const std::string& insn, Emit&& emit) { + std::string result; + for (; count != 0u; --count) { + result += insn; + emit(); + } + return result; + } + + std::string EmitNops(size_t size) { + // TODO(riscv64): Support "C" Standard Extension. + DCHECK_ALIGNED(size, sizeof(uint32_t)); + const size_t num_nops = size / sizeof(uint32_t); + return RepeatInsn(num_nops, "nop\n", [&]() { __ Nop(); }); + } + + auto GetPrintBcond() { + return [](const std::string& cond, + [[maybe_unused]] const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + cond + args + ", " + target + "\n"; + }; + } + + auto GetPrintBcondOppositeAndJ(const std::string& skip_label) { + return [=]([[maybe_unused]] const std::string& cond, + const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + opposite_cond + args + ", " + skip_label + "f\n" + + "j " + target + "\n" + + skip_label + ":\n"; + }; + } + + auto GetPrintBcondOppositeAndTail(const std::string& skip_label, const std::string& base_label) { + return [=]([[maybe_unused]] const std::string& cond, + const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + opposite_cond + args + ", " + skip_label + "f\n" + + base_label + ":\n" + + "auipc t6, %pcrel_hi(" + target + ")\n" + + "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n" + + skip_label + ":\n"; + }; + } + + // Helper function for basic tests that all branch conditions map to the correct opcodes, + // whether with branch expansion (a conditional branch with opposite condition over an + // unconditional branch) or without. + template <typename PrintBcond> + std::string EmitBcondForAllConditions(Riscv64Label* label, + const std::string& target, + PrintBcond&& print_bcond) { + XRegister rs = A0; + __ Beqz(rs, label); + __ Bnez(rs, label); + __ Blez(rs, label); + __ Bgez(rs, label); + __ Bltz(rs, label); + __ Bgtz(rs, label); + XRegister rt = A1; + __ Beq(rs, rt, label); + __ Bne(rs, rt, label); + __ Ble(rs, rt, label); + __ Bge(rs, rt, label); + __ Blt(rs, rt, label); + __ Bgt(rs, rt, label); + __ Bleu(rs, rt, label); + __ Bgeu(rs, rt, label); + __ Bltu(rs, rt, label); + __ Bgtu(rs, rt, label); + + return + print_bcond("eq", "ne", "z a0", target) + + print_bcond("ne", "eq", "z a0", target) + + print_bcond("le", "gt", "z a0", target) + + print_bcond("ge", "lt", "z a0", target) + + print_bcond("lt", "ge", "z a0", target) + + print_bcond("gt", "le", "z a0", target) + + print_bcond("eq", "ne", " a0, a1", target) + + print_bcond("ne", "eq", " a0, a1", target) + + print_bcond("le", "gt", " a0, a1", target) + + print_bcond("ge", "lt", " a0, a1", target) + + print_bcond("lt", "ge", " a0, a1", target) + + print_bcond("gt", "le", " a0, a1", target) + + print_bcond("leu", "gtu", " a0, a1", target) + + print_bcond("geu", "ltu", " a0, a1", target) + + print_bcond("ltu", "geu", " a0, a1", target) + + print_bcond("gtu", "leu", " a0, a1", target); + } + + // Test Bcond for forward branches with all conditions. + // The gap must be such that either all branches expand, or none does. + template <typename PrintBcond> + void TestBcondForward(const std::string& test_name, + size_t gap_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + expected += EmitBcondForAllConditions(&label, target_label + "f", print_bcond); + expected += EmitNops(gap_size); + __ Bind(&label); + expected += target_label + ":\n"; + DriverStr(expected, test_name); + } + + // Test Bcond for backward branches with all conditions. + // The gap must be such that either all branches expand, or none does. + template <typename PrintBcond> + void TestBcondBackward(const std::string& test_name, + size_t gap_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += target_label + ":\n"; + expected += EmitNops(gap_size); + expected += EmitBcondForAllConditions(&label, target_label + "b", print_bcond); + DriverStr(expected, test_name); + } + + size_t MaxOffset13BackwardDistance() { + return 4 * KB; + } + + size_t MaxOffset13ForwardDistance() { + // TODO(riscv64): Support "C" Standard Extension, max forward distance 4KiB - 2. + return 4 * KB - 4; + } + + size_t MaxOffset21BackwardDistance() { + return 1 * MB; + } + + size_t MaxOffset21ForwardDistance() { + // TODO(riscv64): Support "C" Standard Extension, max forward distance 1MiB - 2. + return 1 * MB - 4; + } + + template <typename PrintBcond> + void TestBeqA0A1Forward(const std::string& test_name, + size_t nops_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + __ Beq(A0, A1, &label); + expected += print_bcond("eq", "ne", " a0, a1", target_label + "f"); + expected += EmitNops(nops_size); + __ Bind(&label); + expected += target_label + ":\n"; + DriverStr(expected, test_name); + } + + template <typename PrintBcond> + void TestBeqA0A1Backward(const std::string& test_name, + size_t nops_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += target_label + ":\n"; + expected += EmitNops(nops_size); + __ Beq(A0, A1, &label); + expected += print_bcond("eq", "ne", " a0, a1", target_label + "b"); + DriverStr(expected, test_name); + } + + // Test a branch setup where expanding one branch causes expanding another branch + // which causes expanding another branch, etc. The argument `cascade` determines + // whether we push the first branch to expand, or not. + template <typename PrintBcond> + void TestBeqA0A1MaybeCascade(const std::string& test_name, + bool cascade, + PrintBcond&& print_bcond) { + const size_t kNumBeqs = MaxOffset13ForwardDistance() / sizeof(uint32_t) / 2u; + auto label_name = [](size_t i) { return ".L" + std::to_string(i); }; + + std::string expected; + std::vector<Riscv64Label> labels(kNumBeqs); + for (size_t i = 0; i != kNumBeqs; ++i) { + __ Beq(A0, A1, &labels[i]); + expected += print_bcond("eq", "ne", " a0, a1", label_name(i)); + } + if (cascade) { + expected += EmitNops(sizeof(uint32_t)); + } + for (size_t i = 0; i != kNumBeqs; ++i) { + expected += EmitNops(2 * sizeof(uint32_t)); + __ Bind(&labels[i]); + expected += label_name(i) + ":\n"; + } + DriverStr(expected, test_name); + } + + auto GetPrintJalRd() { + return [=](XRegister rd, const std::string& target) { + std::string rd_name = GetRegisterName(rd); + return "jal " + rd_name + ", " + target + "\n"; + }; + } + + auto GetPrintCallRd(const std::string& base_label) { + return [=](XRegister rd, const std::string& target) { + std::string rd_name = GetRegisterName(rd); + std::string temp_name = (rd != Zero) ? rd_name : GetRegisterName(TMP); + return base_label + ":\n" + + "auipc " + temp_name + ", %pcrel_hi(" + target + ")\n" + + "jalr " + rd_name + ", %pcrel_lo(" + base_label + "b)(" + temp_name + ")\n"; + }; + } + + template <typename PrintJalRd> + void TestJalRdForward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + PrintJalRd&& print_jalrd) { + std::string expected; + Riscv64Label label; + for (XRegister* reg : GetRegisters()) { + __ Jal(*reg, &label); + expected += print_jalrd(*reg, label_name + "f"); + } + expected += EmitNops(gap_size); + __ Bind(&label); + expected += label_name + ":\n"; + DriverStr(expected, test_name); + } + + template <typename PrintJalRd> + void TestJalRdBackward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + PrintJalRd&& print_jalrd) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += label_name + ":\n"; + expected += EmitNops(gap_size); + for (XRegister* reg : GetRegisters()) { + __ Jal(*reg, &label); + expected += print_jalrd(*reg, label_name + "b"); + } + DriverStr(expected, test_name); + } + + auto GetEmitJ() { + return [=](Riscv64Label* label) { __ J(label); }; + } + + auto GetEmitJal() { + return [=](Riscv64Label* label) { __ Jal(label); }; + } + + auto GetPrintJ() { + return [=](const std::string& target) { + return "j " + target + "\n"; + }; + } + + auto GetPrintJal() { + return [=](const std::string& target) { + return "jal " + target + "\n"; + }; + } + + auto GetPrintTail(const std::string& base_label) { + return [=](const std::string& target) { + return base_label + ":\n" + + "auipc t6, %pcrel_hi(" + target + ")\n" + + "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n"; + }; + } + + auto GetPrintCall(const std::string& base_label) { + return [=](const std::string& target) { + return base_label + ":\n" + + "auipc ra, %pcrel_hi(" + target + ")\n" + + "jalr ra, %pcrel_lo(" + base_label + "b)(ra)\n"; + }; + } + + template <typename EmitBuncond, typename PrintBuncond> + void TestBuncondForward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + EmitBuncond&& emit_buncond, + PrintBuncond&& print_buncond) { + std::string expected; + Riscv64Label label; + emit_buncond(&label); + expected += print_buncond(label_name + "f"); + expected += EmitNops(gap_size); + __ Bind(&label); + expected += label_name + ":\n"; + DriverStr(expected, test_name); + } + + template <typename EmitBuncond, typename PrintBuncond> + void TestBuncondBackward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + EmitBuncond&& emit_buncond, + PrintBuncond&& print_buncond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += label_name + ":\n"; + expected += EmitNops(gap_size); + emit_buncond(&label); + expected += print_buncond(label_name + "b"); + DriverStr(expected, test_name); + } + + void TestLoadLiteral(const std::string& test_name, bool with_padding_for_long) { + std::string expected; + Literal* narrow_literal = __ NewLiteral<uint32_t>(0x12345678); + Literal* wide_literal = __ NewLiteral<uint64_t>(0x1234567887654321); + auto print_load = [&](const std::string& load, XRegister rd, const std::string& label) { + std::string rd_name = GetRegisterName(rd); + expected += "1:\n" + "auipc " + rd_name + ", %pcrel_hi(" + label + "f)\n" + + load + " " + rd_name + ", %pcrel_lo(1b)(" + rd_name + ")\n"; + }; + for (XRegister* reg : GetRegisters()) { + if (*reg != Zero) { + __ Lw(*reg, narrow_literal); + print_load("lw", *reg, "2"); + __ Lwu(*reg, narrow_literal); + print_load("lwu", *reg, "2"); + __ Ld(*reg, wide_literal); + print_load("ld", *reg, "3"); + } + } + // All literal loads above emit 8 bytes of code. The narrow literal shall emit 4 bytes of code. + // If we do not add another instruction, we shall end up with padding before the long literal. + expected += EmitNops(with_padding_for_long ? 0u : sizeof(uint32_t)); + expected += "2:\n" + ".4byte 0x12345678\n" + + std::string(with_padding_for_long ? ".4byte 0\n" : "") + + "3:\n" + ".8byte 0x1234567887654321\n"; + DriverStr(expected, test_name); + } + private: std::vector<riscv64::XRegister*> registers_; std::map<riscv64::XRegister, std::string, RISCV64CpuRegisterCompare> secondary_register_names_; @@ -414,15 +767,15 @@ TEST_F(AssemblerRISCV64Test, Mul) { } TEST_F(AssemblerRISCV64Test, Mulh) { - DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Mulh, "mul {reg1}, {reg2}, {reg3}"), "Mulh"); + DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Mulh, "mulh {reg1}, {reg2}, {reg3}"), "Mulh"); } TEST_F(AssemblerRISCV64Test, Mulhsu) { - DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Mulhsu, "mul {reg1}, {reg2}, {reg3}"), "Mulhsu"); + DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Mulhsu, "mulhsu {reg1}, {reg2}, {reg3}"), "Mulhsu"); } TEST_F(AssemblerRISCV64Test, Mulhu) { - DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Mulhu, "mul {reg1}, {reg2}, {reg3}"), "Mulhu"); + DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Mulhu, "mulhu {reg1}, {reg2}, {reg3}"), "Mulhu"); } TEST_F(AssemblerRISCV64Test, Div) { @@ -434,11 +787,11 @@ TEST_F(AssemblerRISCV64Test, Divu) { } TEST_F(AssemblerRISCV64Test, Rem) { - DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Rem, "remw {reg1}, {reg2}, {reg3}"), "Rem"); + DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Rem, "rem {reg1}, {reg2}, {reg3}"), "Rem"); } TEST_F(AssemblerRISCV64Test, Remu) { - DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Remu, "remw {reg1}, {reg2}, {reg3}"), "Remu"); + DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Remu, "remu {reg1}, {reg2}, {reg3}"), "Remu"); } TEST_F(AssemblerRISCV64Test, Mulw) { @@ -450,7 +803,7 @@ TEST_F(AssemblerRISCV64Test, Divw) { } TEST_F(AssemblerRISCV64Test, Divuw) { - DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Divuw, "div {reg1}, {reg2}, {reg3}"), "Divuw"); + DriverStr(RepeatRRR(&riscv64::Riscv64Assembler::Divuw, "divuw {reg1}, {reg2}, {reg3}"), "Divuw"); } TEST_F(AssemblerRISCV64Test, Remw) { @@ -908,10 +1261,308 @@ TEST_F(AssemblerRISCV64Test, Jalr0) { } TEST_F(AssemblerRISCV64Test, Ret) { - GetAssembler()->Ret(); + __ Ret(); DriverStr("ret\n", "Ret"); } +TEST_F(AssemblerRISCV64Test, BcondForward3KiB) { + TestBcondForward("BcondForward3KiB", 3 * KB, "1", GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward3KiB) { + TestBcondBackward("BcondBackward3KiB", 3 * KB, "1", GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BcondForward5KiB) { + TestBcondForward("BcondForward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward5KiB) { + TestBcondBackward("BcondBackward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BcondForward2MiB) { + TestBcondForward("BcondForward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward2MiB) { + TestBcondBackward("BcondBackward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Forward) { + TestBeqA0A1Forward("BeqA0A1MaxOffset13Forward", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u, + "1", + GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Backward) { + TestBeqA0A1Backward("BeqA0A1MaxOffset13Forward", + MaxOffset13BackwardDistance(), + "1", + GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Forward) { + TestBeqA0A1Forward("BeqA0A1OverMaxOffset13Forward", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Backward) { + TestBeqA0A1Backward("BeqA0A1OverMaxOffset13Forward", + MaxOffset13BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Forward) { + TestBeqA0A1Forward("BeqA0A1MaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Backward) { + TestBeqA0A1Backward("BeqA0A1MaxOffset21Backward", + MaxOffset21BackwardDistance() - /*BNE*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Forward) { + TestBeqA0A1Forward("BeqA0A1OverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Backward) { + TestBeqA0A1Backward("BeqA0A1OverMaxOffset21Backward", + MaxOffset21BackwardDistance() - /*BNE*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1AlmostCascade) { + TestBeqA0A1MaybeCascade("BeqA0A1AlmostCascade", /*cascade=*/ false, GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1Cascade) { + TestBeqA0A1MaybeCascade( + "BeqA0A1AlmostCascade", /*cascade=*/ true, GetPrintBcondOppositeAndJ("1")); +} + +TEST_F(AssemblerRISCV64Test, BcondElimination) { + Riscv64Label label; + __ Bind(&label); + __ Nop(); + for (XRegister* reg : GetRegisters()) { + __ Bne(*reg, *reg, &label); + __ Blt(*reg, *reg, &label); + __ Bgt(*reg, *reg, &label); + __ Bltu(*reg, *reg, &label); + __ Bgtu(*reg, *reg, &label); + } + DriverStr("nop\n", "BcondElimination"); +} + +TEST_F(AssemblerRISCV64Test, BcondUnconditional) { + Riscv64Label label; + __ Bind(&label); + __ Nop(); + for (XRegister* reg : GetRegisters()) { + __ Beq(*reg, *reg, &label); + __ Bge(*reg, *reg, &label); + __ Ble(*reg, *reg, &label); + __ Bleu(*reg, *reg, &label); + __ Bgeu(*reg, *reg, &label); + } + std::string expected = + "1:\n" + "nop\n" + + RepeatInsn(5u * GetRegisters().size(), "j 1b\n", []() {}); + DriverStr(expected, "BcondUnconditional"); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward3KiB) { + TestJalRdForward("JalRdForward3KiB", 3 * KB, "1", GetPrintJalRd()); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward3KiB) { + TestJalRdBackward("JalRdBackward3KiB", 3 * KB, "1", GetPrintJalRd()); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward2MiB) { + TestJalRdForward("JalRdForward2MiB", 2 * MB, "1", GetPrintCallRd("2")); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward2MiB) { + TestJalRdBackward("JalRdBackward2MiB", 2 * MB, "1", GetPrintCallRd("2")); +} + +TEST_F(AssemblerRISCV64Test, JForward3KiB) { + TestBuncondForward("JForward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JBackward3KiB) { + TestBuncondBackward("JBackward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JForward2MiB) { + TestBuncondForward("JForward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JBackward2MiB) { + TestBuncondBackward("JBackward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21Forward) { + TestBuncondForward("JMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJ(), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21Backward) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJ(), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Forward) { + TestBuncondForward("JOverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetEmitJ(), + GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Backward) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetEmitJ(), + GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, CallForward3KiB) { + TestBuncondForward("CallForward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallBackward3KiB) { + TestBuncondBackward("CallBackward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallForward2MiB) { + TestBuncondForward("CallForward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallBackward2MiB) { + TestBuncondBackward("CallBackward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallMaxOffset21Forward) { + TestBuncondForward("CallMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJal(), + GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallMaxOffset21Backward) { + TestBuncondBackward("CallMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJal(), + GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Forward) { + TestBuncondForward("CallOverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetEmitJal(), + GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Backward) { + TestBuncondBackward("CallMaxOffset21Backward", + MaxOffset21BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetEmitJal(), + GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, LoadLabelAddress) { + std::string expected; + constexpr size_t kNumLoadsForward = 4 * KB; + constexpr size_t kNumLoadsBackward = 4 * KB; + Riscv64Label label; + auto emit_batch = [&](size_t num_loads, const std::string& target_label) { + for (size_t i = 0; i != num_loads; ++i) { + // Cycle through non-Zero registers. + XRegister rd = enum_cast<XRegister>((i % (kNumberOfXRegisters - 1)) + 1); + DCHECK_NE(rd, Zero); + std::string rd_name = GetRegisterName(rd); + __ LoadLabelAddress(rd, &label); + expected += "1:\n" + "auipc " + rd_name + ", %pcrel_hi(" + target_label + ")\n" + "addi " + rd_name + ", " + rd_name + ", %pcrel_lo(1b)\n"; + } + }; + emit_batch(kNumLoadsForward, "2f"); + __ Bind(&label); + expected += "2:\n"; + emit_batch(kNumLoadsBackward, "2b"); + DriverStr(expected, "LoadLabelAddress"); +} + +TEST_F(AssemblerRISCV64Test, LoadLiteralWithPadingForLong) { + TestLoadLiteral("LoadLiteralWithPadingForLong", /*with_padding_for_long=*/ true); +} + +TEST_F(AssemblerRISCV64Test, LoadLiteralWithoutPadingForLong) { + TestLoadLiteral("LoadLiteralWithoutPadingForLong", /*with_padding_for_long=*/ false); +} + +TEST_F(AssemblerRISCV64Test, JumpTable) { + std::string expected; + expected += EmitNops(sizeof(uint32_t)); + Riscv64Label targets[4]; + uint32_t target_locations[4]; + JumpTable* jump_table = __ CreateJumpTable(ArenaVector<Riscv64Label*>( + {&targets[0], &targets[1], &targets[2], &targets[3]}, __ GetAllocator()->Adapter())); + for (size_t i : {0, 1, 2, 3}) { + target_locations[i] = __ CodeSize(); + __ Bind(&targets[i]); + expected += std::to_string(i) + ":\n"; + expected += EmitNops(sizeof(uint32_t)); + } + __ LoadLabelAddress(A0, jump_table->GetLabel()); + expected += "4:\n" + "auipc a0, %pcrel_hi(5f)\n" + "addi a0, a0, %pcrel_lo(4b)\n"; + expected += EmitNops(sizeof(uint32_t)); + uint32_t label5_location = __ CodeSize(); + auto target_offset = [&](size_t i) { + // Even with `-mno-relax`, clang assembler does not fully resolve `.4byte 0b - 5b` + // and emits a relocation, so we need to calculate target offsets ourselves. + return std::to_string(static_cast<int64_t>(target_locations[i] - label5_location)); + }; + expected += "5:\n" + ".4byte " + target_offset(0) + "\n" + ".4byte " + target_offset(1) + "\n" + ".4byte " + target_offset(2) + "\n" + ".4byte " + target_offset(3) + "\n"; + DriverStr(expected, "JumpTable"); +} + #undef __ +} // namespace riscv64 } // namespace art diff --git a/runtime/arch/riscv64/registers_riscv64.h b/runtime/arch/riscv64/registers_riscv64.h index bd0991e879..f05c65ab2e 100644 --- a/runtime/arch/riscv64/registers_riscv64.h +++ b/runtime/arch/riscv64/registers_riscv64.h @@ -67,7 +67,8 @@ enum XRegister { kNoXRegister = -1, // Signals an illegal X register. // Aliases. - TR = S1, // ART Thread Register - managed runtime + TR = S1, // ART Thread Register - managed runtime + TMP = T6, // Reserved for special uses, such as assembler macro instructions. }; std::ostream& operator<<(std::ostream& os, const XRegister& rhs); |