diff options
Diffstat (limited to 'compiler/utils')
34 files changed, 9088 insertions, 577 deletions
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index c7ca003530..d64de09501 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -52,7 +52,7 @@ const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const { return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); } -void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) { +void ArmVIXLAssembler::CopyInstructions(const MemoryRegion& region) { // Copy the instructions from the buffer. MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); region.CopyFrom(0, from); diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 741119d7f7..50dc06fefc 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -173,6 +173,30 @@ class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler { } } using MacroAssembler::Vmov; + + // TODO(b/281982421): Move the implementation of Mrrc to vixl and remove this implementation. + void Mrrc(vixl32::Register r1, vixl32::Register r2, int coproc, int opc1, int crm) { + // See ARM A-profile A32/T32 Instruction set architecture + // https://developer.arm.com/documentation/ddi0597/2022-09/Base-Instructions/MRRC--Move-to-two-general-purpose-registers-from-System-register- + CHECK(coproc == 15 || coproc == 14); + if (IsUsingT32()) { + uint32_t inst = (0b111011000101 << 20) | + (r2.GetCode() << 16) | + (r1.GetCode() << 12) | + (coproc << 8) | + (opc1 << 4) | + crm; + EmitT32_32(inst); + } else { + uint32_t inst = (0b000011000101 << 20) | + (r2.GetCode() << 16) | + (r1.GetCode() << 12) | + (coproc << 8) | + (opc1 << 4) | + crm; + EmitA32(inst); + } + } }; class ArmVIXLAssembler final : public Assembler { @@ -194,12 +218,12 @@ class ArmVIXLAssembler final : public Assembler { const uint8_t* CodeBufferBaseAddress() const override; // Copy instructions out of assembly buffer into the given region of memory. - void FinalizeInstructions(const MemoryRegion& region) override; + void CopyInstructions(const MemoryRegion& region) override; - void Bind(Label* label ATTRIBUTE_UNUSED) override { + void Bind([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM"; } - void Jump(Label* label ATTRIBUTE_UNUSED) override { + void Jump([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM"; } diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 54873454eb..9c2589138c 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -155,7 +155,7 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, // Pop LR to PC unless we need to emit some read barrier code just before returning. bool emit_code_before_return = - (gUseReadBarrier && kUseBakerReadBarrier) && + kReserveMarkingRegister && (may_suspend || (kIsDebugBuild && emit_run_time_checks_in_debug_mode_)); if ((core_spill_mask & (1u << lr.GetCode())) != 0u && !emit_code_before_return) { DCHECK_EQ(core_spill_mask & (1u << pc.GetCode()), 0u); @@ -197,18 +197,7 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, // Pop core callee saves. if (core_spill_mask != 0u) { - if (IsPowerOfTwo(core_spill_mask) && - core_spill_mask != (1u << pc.GetCode()) && - WhichPowerOf2(core_spill_mask) >= 8) { - // FIXME(vixl): vixl fails to transform a pop with single high register - // to a post-index STR (also known as POP encoding T3) and emits the LDMIA - // (also known as POP encoding T2) which is UNPREDICTABLE for 1 register. - // So we have to explicitly do the transformation here. Bug: 178048807 - vixl32::Register reg(WhichPowerOf2(core_spill_mask)); - ___ Ldr(reg, MemOperand(sp, kFramePointerSize, PostIndex)); - } else { - ___ Pop(RegisterList(core_spill_mask)); - } + ___ Pop(RegisterList(core_spill_mask)); if ((core_spill_mask & (1u << pc.GetCode())) == 0u) { cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(core_spill_mask)); cfi().RestoreMany(DWARFReg(r0), core_spill_mask); @@ -344,13 +333,13 @@ void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs } } -void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; } -void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; } @@ -720,7 +709,7 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, ManagedRegister msrc, - size_t size ATTRIBUTE_UNUSED) { + [[maybe_unused]] size_t size) { ArmManagedRegister dst = mdst.AsArm(); if (kIsDebugBuild) { // Check that the destination is not a scratch register. @@ -861,13 +850,13 @@ void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister ___ Ldr(reg, MemOperand(reg)); } -void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, - bool could_be_null ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister src, + [[maybe_unused]] bool could_be_null) { // TODO: not validating references. } -void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, - bool could_be_null ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src, + [[maybe_unused]] bool could_be_null) { // TODO: not validating references. } @@ -1026,7 +1015,6 @@ void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnary UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register test_reg; DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); - DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // TestGcMarking() is used in the JNI stub entry when the marking register is up to date. if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 26dce7c502..13acc7c852 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -79,7 +79,7 @@ const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const { return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); } -void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) { +void Arm64Assembler::CopyInstructions(const MemoryRegion& region) { // Copy the instructions from the buffer. MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); region.CopyFrom(0, from); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index f8168903bd..ad6a8edadf 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -91,7 +91,7 @@ class Arm64Assembler final : public Assembler { const uint8_t* CodeBufferBaseAddress() const override; // Copy instructions out of assembly buffer into the given region of memory. - void FinalizeInstructions(const MemoryRegion& region) override; + void CopyInstructions(const MemoryRegion& region) override; void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs); @@ -145,10 +145,10 @@ class Arm64Assembler final : public Assembler { // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction. void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0); - void Bind(Label* label ATTRIBUTE_UNUSED) override { + void Bind([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM64"; } - void Jump(Label* label ATTRIBUTE_UNUSED) override { + void Jump([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM64"; } diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index 9e9f122cf6..8ce44b6c63 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -705,7 +705,7 @@ void Arm64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m } void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative( - JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) { + JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>(); @@ -734,8 +734,8 @@ void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative( void Arm64JNIMacroAssembler::TryToTransitionFromNativeToRunnable( JNIMacroLabel* label, - ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED, - ManagedRegister return_reg ATTRIBUTE_UNUSED) { + [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs, + [[maybe_unused]] ManagedRegister return_reg) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>(); @@ -811,7 +811,6 @@ void Arm64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCo UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); Register test_reg; DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); - DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // TestGcMarking() is used in the JNI stub entry when the marking register is up to date. if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) { diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index b82f0dc4b4..1c04a3d20b 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -57,18 +57,21 @@ void AssemblerBuffer::ProcessFixups(const MemoryRegion& region) { fixup->Process(region, fixup->position()); fixup = fixup->previous(); } +#ifndef NDEBUG + fixups_processed_ = true; +#endif +} + + +void AssemblerBuffer::ProcessFixups() { + MemoryRegion from(reinterpret_cast<void*>(contents()), Size()); + ProcessFixups(from); } -void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) { - // Copy the instructions from the buffer. +void AssemblerBuffer::CopyInstructions(const MemoryRegion& instructions) { MemoryRegion from(reinterpret_cast<void*>(contents()), Size()); instructions.CopyFrom(0, from); - // Process fixups in the instructions. - ProcessFixups(instructions); -#ifndef NDEBUG - fixups_processed_ = true; -#endif } diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 13a5d9fd01..f3fa711dbb 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -163,9 +163,8 @@ class AssemblerBuffer { uint8_t* contents() const { return contents_; } - // Copy the assembled instructions into the specified memory block - // and apply all fixups. - void FinalizeInstructions(const MemoryRegion& region); + // Copy the assembled instructions into the specified memory block. + void CopyInstructions(const MemoryRegion& region); // To emit an instruction to the assembler buffer, the EnsureCapacity helper // must be used to guarantee that the underlying data area is big enough to @@ -246,6 +245,8 @@ class AssemblerBuffer { // The provided `min_capacity` must be higher than current `Capacity()`. void ExtendCapacity(size_t min_capacity); + void ProcessFixups(); + private: // The limit is set to kMinimumGap bytes before the end of the data area. // This leaves enough space for the longest possible instruction and allows @@ -357,7 +358,10 @@ class DebugFrameOpCodeWriterForAssembler final class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { public: // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. - virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); } + virtual void FinalizeCode() { + buffer_.EmitSlowPaths(this); + buffer_.ProcessFixups(); + } // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } @@ -375,12 +379,12 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual size_t CodePosition() { return CodeSize(); } // Copy instructions out of assembly buffer into the given region of memory - virtual void FinalizeInstructions(const MemoryRegion& region) { - buffer_.FinalizeInstructions(region); + virtual void CopyInstructions(const MemoryRegion& region) { + buffer_.CopyInstructions(region); } // TODO: Implement with disassembler. - virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {} + virtual void Comment([[maybe_unused]] const char* format, ...) {} virtual void Bind(Label* label) = 0; virtual void Jump(Label* label) = 0; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index d03e5a7abc..72f48367a6 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -26,6 +26,7 @@ #include <fstream> #include <iterator> +#include "base/array_ref.h" #include "base/macros.h" #include "base/malloc_arena_pool.h" #include "assembler_test_base.h" @@ -200,8 +201,8 @@ class AssemblerTest : public AssemblerTestBase { template <typename Reg1, typename Reg2, typename ImmType> std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType), int imm_bits, - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, @@ -215,48 +216,28 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); + (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, bias, multiplier, &base); - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm * multiplier + bias; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } - - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2, typename Reg3, typename ImmType> std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, Reg3, ImmType), int imm_bits, - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, - const std::vector<Reg3*> reg3_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + ArrayRef<const Reg3> reg3_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), std::string (AssemblerTest::*GetName3)(const Reg3&), @@ -271,53 +252,28 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias); + (assembler_.get()->*f)(reg1, reg2, reg3, new_imm + bias); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } - - std::string reg3_string = (this->*GetName3)(*reg3); - size_t reg3_index; - while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { - base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm + bias; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base); + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename ImmType, typename Reg1, typename Reg2> std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), int imm_bits, @@ -332,46 +288,26 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(new_imm, *reg1, *reg2); + (assembler_.get()->*f)(new_imm, reg1, reg2); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename RegType, typename ImmType> std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), int imm_bits, - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, std::string (AssemblerTest::*GetName)(const RegType&), const std::string& fmt, int bias) { @@ -382,36 +318,148 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg, new_imm + bias); + (assembler_.get()->*f)(reg, new_imm + bias); } std::string base = fmt; - std::string reg_string = (this->*GetName)(*reg); - size_t reg_index; - while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } + ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base); + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm + bias; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + str += base; + str += "\n"; + } + } + return str; + } + + template <typename RegType, typename ImmType> + std::string RepeatTemplatedRegisterImmBitsShift( + void (Ass::*f)(RegType, ImmType), + int imm_bits, + int shift, + ArrayRef<const RegType> registers, + std::string (AssemblerTest::*GetName)(const RegType&), + const std::string& fmt, + int bias) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift); + + for (auto reg : registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + if (f != nullptr) { + (assembler_.get()->*f)(reg, new_imm + bias); } + std::string base = fmt; + + ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base); + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); + + str += base; + str += "\n"; + } + } + return str; + } + + template <typename ImmType> + std::string RepeatTemplatedImmBitsShift( + void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) { + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift); + + WarnOnCombinations(imms.size()); + + std::string str; - if (str.size() > 0) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + if (f != nullptr) { + (assembler_.get()->*f)(new_imm + bias); + } + std::string base = fmt; + + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); + + str += base; + str += "\n"; + } + return str; + } + + template <typename Reg1, typename Reg2, typename ImmType> + std::string RepeatTemplatedRegistersImmBitsShift( + void (Ass::*f)(Reg1, Reg2, ImmType), + int imm_bits, + int shift, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + const std::string& fmt, + int bias = 0, + int multiplier = 1) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift); + + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + if (f != nullptr) { + (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias); + } + std::string base = fmt; + + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, bias, multiplier, &base); + + str += base; str += "\n"; } - str += base; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename ImmType> + std::string RepeatIbS( + void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) { + return RepeatTemplatedImmBitsShift<ImmType>(f, imm_bits, shift, fmt, bias); + } + + template <typename ImmType> + std::string RepeatRIbS( + void (Ass::*f)(Reg, ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) { + return RepeatTemplatedRegisterImmBitsShift<Reg, ImmType>( + f, + imm_bits, + shift, + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias); + } + + template <typename ImmType> + std::string RepeatRRIbS(void (Ass::*f)(Reg, Reg, ImmType), + int imm_bits, + int shift, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBitsShift<Reg, Reg, ImmType>( + f, + imm_bits, + shift, + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias); + } + + template <typename ImmType> std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, const std::string& fmt, @@ -488,6 +536,19 @@ class AssemblerTest : public AssemblerTestBase { fmt); } + std::string RepeatFFFF(void (Ass::*f)(FPReg, FPReg, FPReg, FPReg), const std::string& fmt) { + return RepeatTemplatedRegisters<FPReg, FPReg, FPReg, FPReg>(f, + GetFPRegisters(), + GetFPRegisters(), + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + fmt); + } + std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, FPReg, Reg>( f, @@ -538,6 +599,32 @@ class AssemblerTest : public AssemblerTestBase { fmt); } + std::string RepeatRFF(void (Ass::*f)(Reg, FPReg, FPReg), const std::string& fmt) { + return RepeatTemplatedRegisters<Reg, FPReg, FPReg>( + f, + GetRegisters(), + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + fmt); + } + + template <typename ImmType> + std::string RepeatRFIb(void (Ass::*f)(Reg, FPReg, ImmType), + int imm_bits, + const std::string& fmt) { + return RepeatTemplatedRegistersImmBits<Reg, FPReg, ImmType>( + f, + imm_bits, + GetRegisters(), + GetFPRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetFPRegName, + fmt); + } + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), @@ -590,21 +677,11 @@ class AssemblerTest : public AssemblerTestBase { } std::string base = fmt; - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } - // Add a newline at the end. - str += "\n"; return str; } @@ -710,36 +787,36 @@ class AssemblerTest : public AssemblerTestBase { // Returns a vector of registers used by any of the repeat methods // involving an "R" (e.g. RepeatR). - virtual std::vector<Reg*> GetRegisters() = 0; + virtual ArrayRef<const Reg> GetRegisters() = 0; // Returns a vector of fp-registers used by any of the repeat methods // involving an "F" (e.g. RepeatFF). - virtual std::vector<FPReg*> GetFPRegisters() { + virtual ArrayRef<const FPReg> GetFPRegisters() { UNIMPLEMENTED(FATAL) << "Architecture does not support floating-point registers"; UNREACHABLE(); } // Returns a vector of dedicated simd-registers used by any of the repeat // methods involving an "V" (e.g. RepeatVV). - virtual std::vector<VecReg*> GetVectorRegisters() { + virtual ArrayRef<const VecReg> GetVectorRegisters() { UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers"; UNREACHABLE(); } // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems. - virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + virtual std::string GetSecondaryRegisterName([[maybe_unused]] const Reg& reg) { UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers"; UNREACHABLE(); } // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems. - virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + virtual std::string GetTertiaryRegisterName([[maybe_unused]] const Reg& reg) { UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers"; UNREACHABLE(); } // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems. - virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + virtual std::string GetQuaternaryRegisterName([[maybe_unused]] const Reg& reg) { UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers"; UNREACHABLE(); } @@ -818,7 +895,9 @@ class AssemblerTest : public AssemblerTestBase { const int kMaxBitsExhaustiveTest = 8; // Create a couple of immediate values up to the number of bits given. - virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, bool as_uint = false) { + virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, + bool as_uint = false, + int shift = 0) { CHECK_GT(imm_bits, 0); CHECK_LE(imm_bits, 64); std::vector<int64_t> res; @@ -826,11 +905,11 @@ class AssemblerTest : public AssemblerTestBase { if (imm_bits <= kMaxBitsExhaustiveTest) { if (as_uint) { for (uint64_t i = MinInt<uint64_t>(imm_bits); i <= MaxInt<uint64_t>(imm_bits); i++) { - res.push_back(static_cast<int64_t>(i)); + res.push_back(static_cast<int64_t>(i << shift)); } } else { for (int64_t i = MinInt<int64_t>(imm_bits); i <= MaxInt<int64_t>(imm_bits); i++) { - res.push_back(i); + res.push_back(i << shift); } } } else { @@ -838,14 +917,14 @@ class AssemblerTest : public AssemblerTestBase { for (uint64_t i = MinInt<uint64_t>(kMaxBitsExhaustiveTest); i <= MaxInt<uint64_t>(kMaxBitsExhaustiveTest); i++) { - res.push_back(static_cast<int64_t>(i)); + res.push_back(static_cast<int64_t>(i << shift)); } for (int i = 0; i <= imm_bits; i++) { uint64_t j = (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1) + ((MaxInt<uint64_t>(imm_bits) - (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1)) * i / imm_bits); - res.push_back(static_cast<int64_t>(j)); + res.push_back(static_cast<int64_t>(j << shift)); } } else { for (int i = 0; i <= imm_bits; i++) { @@ -853,18 +932,18 @@ class AssemblerTest : public AssemblerTestBase { ((((MinInt<int64_t>(kMaxBitsExhaustiveTest) - 1) - MinInt<int64_t>(imm_bits)) * i) / imm_bits); - res.push_back(static_cast<int64_t>(j)); + res.push_back(static_cast<int64_t>(j << shift)); } for (int64_t i = MinInt<int64_t>(kMaxBitsExhaustiveTest); i <= MaxInt<int64_t>(kMaxBitsExhaustiveTest); i++) { - res.push_back(static_cast<int64_t>(i)); + res.push_back(static_cast<int64_t>(i << shift)); } for (int i = 0; i <= imm_bits; i++) { int64_t j = (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1) + ((MaxInt<int64_t>(imm_bits) - (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1)) * i / imm_bits); - res.push_back(static_cast<int64_t>(j)); + res.push_back(static_cast<int64_t>(j << shift)); } } } @@ -1111,19 +1190,11 @@ class AssemblerTest : public AssemblerTestBase { } std::string base = fmt; - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } + ReplaceAddr((this->*GetAName)(addr), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } - // Add a newline at the end. - str += "\n"; return str; } @@ -1144,34 +1215,19 @@ class AssemblerTest : public AssemblerTestBase { } std::string base = fmt; - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceAddr((this->*GetAName)(addr), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename RegType, typename AddrType> std::string RepeatTemplatedRegMem(void (Ass::*f)(RegType, const AddrType&), - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, const std::vector<AddrType> addresses, std::string (AssemblerTest::*GetRName)(const RegType&), std::string (AssemblerTest::*GetAName)(const AddrType&), @@ -1181,37 +1237,24 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg : registers) { for (auto addr : addresses) { if (f != nullptr) { - (assembler_.get()->*f)(*reg, addr); + (assembler_.get()->*f)(reg, addr); } std::string base = fmt; - std::string reg_string = (this->*GetRName)(*reg); - size_t reg_index; - if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } - - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } + ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base); + ReplaceAddr((this->*GetAName)(addr), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename AddrType, typename RegType> std::string RepeatTemplatedMemReg(void (Ass::*f)(const AddrType&, RegType), const std::vector<AddrType> addresses, - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, std::string (AssemblerTest::*GetAName)(const AddrType&), std::string (AssemblerTest::*GetRName)(const RegType&), const std::string& fmt) { @@ -1220,30 +1263,17 @@ class AssemblerTest : public AssemblerTestBase { for (auto addr : addresses) { for (auto reg : registers) { if (f != nullptr) { - (assembler_.get()->*f)(addr, *reg); + (assembler_.get()->*f)(addr, reg); } std::string base = fmt; - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } + ReplaceAddr((this->*GetAName)(addr), &base); + ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base); - std::string reg_string = (this->*GetRName)(*reg); - size_t reg_index; - if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } - - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } @@ -1253,36 +1283,28 @@ class AssemblerTest : public AssemblerTestBase { template <typename RegType> std::string RepeatTemplatedRegister(void (Ass::*f)(RegType), - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, std::string (AssemblerTest::*GetName)(const RegType&), const std::string& fmt) { std::string str; for (auto reg : registers) { if (f != nullptr) { - (assembler_.get()->*f)(*reg); + (assembler_.get()->*f)(reg); } std::string base = fmt; - std::string reg_string = (this->*GetName)(*reg); - size_t reg_index; - if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } + ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, @@ -1294,44 +1316,31 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg2 : reg2_registers) { // Check if this register pair is on the exception list. If so, skip it. if (except != nullptr) { - const auto& pair = std::make_pair(*reg1, *reg2); + const auto& pair = std::make_pair(reg1, reg2); if (std::find(except->begin(), except->end(), pair) != except->end()) { continue; } } if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2); + (assembler_.get()->*f)(reg1, reg2); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegistersNoDupes(void (Ass::*f)(Reg1, Reg2), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt) { @@ -1342,38 +1351,25 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg2 : reg2_registers) { if (reg1 == reg2) continue; if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2); + (assembler_.get()->*f)(reg1, reg2); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2, typename Reg3> std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, - const std::vector<Reg3*> reg3_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + ArrayRef<const Reg3> reg3_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), std::string (AssemblerTest::*GetName3)(const Reg3&), @@ -1383,44 +1379,61 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg2 : reg2_registers) { for (auto reg3 : reg3_registers) { if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, *reg3); + (assembler_.get()->*f)(reg1, reg2, reg3); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base); - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + str += base; + str += "\n"; + } + } + } + return str; + } - std::string reg3_string = (this->*GetName3)(*reg3); - size_t reg3_index; - while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { - base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); - } + template <typename Reg1, typename Reg2, typename Reg3, typename Reg4> + std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3, Reg4), + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + ArrayRef<const Reg3> reg3_registers, + ArrayRef<const Reg4> reg4_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + std::string (AssemblerTest::*GetName3)(const Reg3&), + std::string (AssemblerTest::*GetName4)(const Reg4&), + const std::string& fmt) { + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (auto reg3 : reg3_registers) { + for (auto reg4 : reg4_registers) { + if (f != nullptr) { + (assembler_.get()->*f)(reg1, reg2, reg3, reg4); + } + std::string base = fmt; - if (str.size() > 0) { + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base); + ReplaceReg(REG4_TOKEN, (this->*GetName4)(reg4), &base); + + str += base; str += "\n"; } - str += base; } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), size_t imm_bytes, @@ -1434,39 +1447,19 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, new_imm); + (assembler_.get()->*f)(reg1, reg2, new_imm); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } - // Add a newline at the end. - str += "\n"; return str; } @@ -1517,11 +1510,41 @@ class AssemblerTest : public AssemblerTestBase { } } + static void ReplaceReg(const std::string& reg_token, + const std::string& replacement, + /*inout*/ std::string* str) { + size_t reg_index; + while ((reg_index = str->find(reg_token)) != std::string::npos) { + str->replace(reg_index, reg_token.length(), replacement); + } + } + + static void ReplaceImm(int64_t imm, + int64_t bias, + int64_t multiplier, + /*inout*/ std::string* str) { + size_t imm_index = str->find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm * multiplier + bias; + std::string imm_string = sreg.str(); + str->replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + } + + static void ReplaceAddr(const std::string& replacement, /*inout*/ std::string* str) { + size_t addr_index; + if ((addr_index = str->find(ADDRESS_TOKEN)) != std::string::npos) { + str->replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), replacement); + } + } + static constexpr const char* ADDRESS_TOKEN = "{mem}"; static constexpr const char* REG_TOKEN = "{reg}"; static constexpr const char* REG1_TOKEN = "{reg1}"; static constexpr const char* REG2_TOKEN = "{reg2}"; static constexpr const char* REG3_TOKEN = "{reg3}"; + static constexpr const char* REG4_TOKEN = "{reg4}"; static constexpr const char* IMM_TOKEN = "{imm}"; private: @@ -1529,7 +1552,7 @@ class AssemblerTest : public AssemblerTestBase { std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) { - const std::vector<Reg*> registers = GetRegisters(); + ArrayRef<const Reg> registers = GetRegisters(); std::string str; std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); @@ -1539,45 +1562,29 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg, new_imm); + (assembler_.get()->*f)(reg, new_imm); } std::string base = fmt; - std::string reg_string = GetRegName<kRegView>(*reg); - size_t reg_index; - while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } + ReplaceReg(REG_TOKEN, GetRegName<kRegView>(reg), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } - - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } // Override this to pad the code with NOPs to a certain size if needed. - virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { - } + virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {} void DriverWrapper(const std::string& assembly_text, const std::string& test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); - assembler_->FinalizeInstructions(code); + assembler_->CopyInstructions(code); Pad(*data); Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index 73f3657413..6f836d3718 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -141,6 +141,16 @@ class AssemblerTestBase : public testing::Test { virtual std::vector<std::string> GetAssemblerCommand() { InstructionSet isa = GetIsa(); switch (isa) { + case InstructionSet::kRiscv64: + // TODO(riscv64): Support compression (RV32C) in assembler and tests (add `c` to `-march=`). + return {FindTool("clang"), + "--compile", + "-target", + "riscv64-linux-gnu", + "-march=rv64imafd_zba_zbb", + // Force the assembler to fully emit branch instructions instead of leaving + // offsets unresolved with relocation information for the linker. + "-mno-relax"}; case InstructionSet::kX86: return {FindTool("clang"), "--compile", "-target", "i386-linux-gnu"}; case InstructionSet::kX86_64: @@ -159,6 +169,15 @@ class AssemblerTestBase : public testing::Test { "--no-print-imm-hex", "--triple", "thumbv7a-linux-gnueabi"}; + case InstructionSet::kRiscv64: + return {FindTool("llvm-objdump"), + "--disassemble", + "--no-print-imm-hex", + "--no-show-raw-insn", + // Disassemble Standard Extensions supported by the assembler. + "--mattr=+F,+D,+A,+Zba,+Zbb", + "-M", + "no-aliases"}; default: return { FindTool("llvm-objdump"), "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn"}; diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 672cd3d10f..53cb3d6f8e 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -79,7 +79,7 @@ class ArmVIXLAssemblerTest : public AssemblerTestBase { size_t cs = __ CodeSize(); std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); DumpAndCheck(managed_code, testname, expected); } diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index aea7f14762..6e0048eb20 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -76,7 +76,7 @@ const char* const VixlJniHelpersResults = { " e4: f1bb 0f00 cmp.w r11, #0\n" " e8: bf18 it ne\n" " ea: 46e3 movne r11, r12\n" - " ec: f8d9 c09c ldr.w r12, [r9, #156]\n" + " ec: f8d9 c094 ldr.w r12, [r9, #148]\n" " f0: f1bc 0f00 cmp.w r12, #0\n" " f4: d16f bne 0x1d6 @ imm = #222\n" " f6: f8cd c7ff str.w r12, [sp, #2047]\n" @@ -151,10 +151,10 @@ const char* const VixlJniHelpersResults = { " 206: b001 add sp, #4\n" " 208: ecbd 8a10 vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n" " 20c: e8bd 4de0 pop.w {r5, r6, r7, r8, r10, r11, lr}\n" - " 210: f8d9 8024 ldr.w r8, [r9, #36]\n" + " 210: f8d9 8020 ldr.w r8, [r9, #32]\n" " 214: 4770 bx lr\n" - " 216: f8d9 009c ldr.w r0, [r9, #156]\n" - " 21a: f8d9 e2d0 ldr.w lr, [r9, #720]\n" + " 216: f8d9 0094 ldr.w r0, [r9, #148]\n" + " 21a: f8d9 e2c4 ldr.w lr, [r9, #708]\n" " 21e: 47f0 blx lr\n" }; diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 8b47b38e63..7a90a46f51 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -25,6 +25,9 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "arm64/jni_macro_assembler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "riscv64/jni_macro_assembler_riscv64.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "x86/jni_macro_assembler_x86.h" #endif @@ -34,6 +37,8 @@ #include "base/casts.h" #include "base/globals.h" #include "base/memory_region.h" +#include "gc_root.h" +#include "stack_reference.h" namespace art HIDDEN { @@ -79,6 +84,10 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( case InstructionSet::kArm64: return MacroAsm64UniquePtr(new (allocator) arm64::Arm64JNIMacroAssembler(allocator)); #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: + return MacroAsm64UniquePtr(new (allocator) riscv64::Riscv64JNIMacroAssembler(allocator)); +#endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: return MacroAsm64UniquePtr(new (allocator) x86_64::X86_64JNIMacroAssembler(allocator)); @@ -90,4 +99,34 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( } } +template <PointerSize kPointerSize> +void JNIMacroAssembler<kPointerSize>::LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs) { + static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>)); + Load(dest, base, offs, sizeof(uint32_t)); +} + +template +void JNIMacroAssembler<PointerSize::k32>::LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs); +template +void JNIMacroAssembler<PointerSize::k64>::LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs); + +template <PointerSize kPointerSize> +void JNIMacroAssembler<kPointerSize>::LoadStackReference(ManagedRegister dest, FrameOffset offs) { + static_assert(sizeof(uint32_t) == sizeof(StackReference<mirror::Object>)); + Load(dest, offs, sizeof(uint32_t)); +} + +template +void JNIMacroAssembler<PointerSize::k32>::LoadStackReference(ManagedRegister dest, + FrameOffset offs); +template +void JNIMacroAssembler<PointerSize::k64>::LoadStackReference(ManagedRegister dest, + FrameOffset offs); + } // namespace art diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h index 0c729705dc..2d52eada08 100644 --- a/compiler/utils/jni_macro_assembler.h +++ b/compiler/utils/jni_macro_assembler.h @@ -92,7 +92,7 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual size_t CodeSize() const = 0; // Copy instructions out of assembly buffer into the given region of memory - virtual void FinalizeInstructions(const MemoryRegion& region) = 0; + virtual void CopyInstructions(const MemoryRegion& region) = 0; // Emit code that will create an activation on the stack virtual void BuildFrame(size_t frame_size, @@ -129,9 +129,18 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { // Load routines virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0; virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0; - virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0; + // Load reference from a `GcRoot<>`. The default is to load as `jint`. Some architectures + // (say, RISC-V) override this to provide a different sign- or zero-extension. + virtual void LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs); + + // Load reference from a `StackReference<>`. The default is to load as `jint`. Some architectures + // (say, RISC-V) override this to provide a different sign- or zero-extension. + virtual void LoadStackReference(ManagedRegister dest, FrameOffset offs); + // Copying routines // Move arguments from `srcs` locations to `dests` locations. @@ -266,8 +275,8 @@ class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> { return asm_.CodeSize(); } - void FinalizeInstructions(const MemoryRegion& region) override { - asm_.FinalizeInstructions(region); + void CopyInstructions(const MemoryRegion& region) override { + asm_.CopyInstructions(region); } DebugFrameOpCodeWriterForAssembler& cfi() override { diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h index ac8e7d3010..ff182e6146 100644 --- a/compiler/utils/jni_macro_assembler_test.h +++ b/compiler/utils/jni_macro_assembler_test.h @@ -77,15 +77,14 @@ class JNIMacroAssemblerTest : public AssemblerTestBase { private: // Override this to pad the code with NOPs to a certain size if needed. - virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { - } + virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {} void DriverWrapper(const std::string& assembly_text, const std::string& test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); - assembler_->FinalizeInstructions(code); + assembler_->CopyInstructions(code); Pad(*data); Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 0368d90a26..25bf01376b 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -31,6 +31,10 @@ class AssemblerFixup; namespace arm64 { class Arm64Assembler; } // namespace arm64 +namespace riscv64 { +class Riscv64Assembler; +class Riscv64Label; +} // namespace riscv64 namespace x86 { class X86Assembler; class NearLabel; @@ -109,6 +113,8 @@ class Label { } friend class arm64::Arm64Assembler; + friend class riscv64::Riscv64Assembler; + friend class riscv64::Riscv64Label; friend class x86::X86Assembler; friend class x86::NearLabel; friend class x86_64::X86_64Assembler; diff --git a/compiler/utils/riscv64/assembler_riscv64.cc b/compiler/utils/riscv64/assembler_riscv64.cc new file mode 100644 index 0000000000..089bc5dfe6 --- /dev/null +++ b/compiler/utils/riscv64/assembler_riscv64.cc @@ -0,0 +1,2422 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_riscv64.h" + +#include "base/bit_utils.h" +#include "base/casts.h" +#include "base/logging.h" +#include "base/memory_region.h" + +namespace art HIDDEN { +namespace riscv64 { + +static_assert(static_cast<size_t>(kRiscv64PointerSize) == kRiscv64DoublewordSize, + "Unexpected Riscv64 pointer size."); +static_assert(kRiscv64PointerSize == PointerSize::k64, "Unexpected Riscv64 pointer size."); + +// Split 32-bit offset into an `imm20` for LUI/AUIPC and +// a signed 12-bit short offset for ADDI/JALR/etc. +ALWAYS_INLINE static inline std::pair<uint32_t, int32_t> SplitOffset(int32_t offset) { + // The highest 0x800 values are out of range. + DCHECK_LT(offset, 0x7ffff800); + // Round `offset` to nearest 4KiB offset because short offset has range [-0x800, 0x800). + int32_t near_offset = (offset + 0x800) & ~0xfff; + // Calculate the short offset. + int32_t short_offset = offset - near_offset; + DCHECK(IsInt<12>(short_offset)); + // Extract the `imm20`. + uint32_t imm20 = static_cast<uint32_t>(near_offset) >> 12; + // Return the result as a pair. + return std::make_pair(imm20, short_offset); +} + +ALWAYS_INLINE static inline int32_t ToInt12(uint32_t uint12) { + DCHECK(IsUint<12>(uint12)); + return static_cast<int32_t>(uint12 - ((uint12 & 0x800) << 1)); +} + +void Riscv64Assembler::FinalizeCode() { + CHECK(!finalized_); + Assembler::FinalizeCode(); + ReserveJumpTableSpace(); + EmitLiterals(); + PromoteBranches(); + EmitBranches(); + EmitJumpTables(); + PatchCFI(); + finalized_ = true; +} + +void Riscv64Assembler::Emit(uint32_t value) { + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } +} + +/////////////////////////////// RV64 VARIANTS extension /////////////////////////////// + +//////////////////////////////// RV64 "I" Instructions //////////////////////////////// + +// LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37 + +void Riscv64Assembler::Lui(XRegister rd, uint32_t imm20) { + EmitU(imm20, rd, 0x37); +} + +void Riscv64Assembler::Auipc(XRegister rd, uint32_t imm20) { + EmitU(imm20, rd, 0x17); +} + +// Jump instructions (RV32I), opcode = 0x67, 0x6f + +void Riscv64Assembler::Jal(XRegister rd, int32_t offset) { + EmitJ(offset, rd, 0x6F); +} + +void Riscv64Assembler::Jalr(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x0, rd, 0x67); +} + +// Branch instructions, opcode = 0x63 (subfunc from 0x0 ~ 0x7), 0x67, 0x6f + +void Riscv64Assembler::Beq(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x0, 0x63); +} + +void Riscv64Assembler::Bne(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x1, 0x63); +} + +void Riscv64Assembler::Blt(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x4, 0x63); +} + +void Riscv64Assembler::Bge(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x5, 0x63); +} + +void Riscv64Assembler::Bltu(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x6, 0x63); +} + +void Riscv64Assembler::Bgeu(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x7, 0x63); +} + +// Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6 + +void Riscv64Assembler::Lb(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x0, rd, 0x03); +} + +void Riscv64Assembler::Lh(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x1, rd, 0x03); +} + +void Riscv64Assembler::Lw(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x2, rd, 0x03); +} + +void Riscv64Assembler::Ld(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x3, rd, 0x03); +} + +void Riscv64Assembler::Lbu(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x4, rd, 0x03); +} + +void Riscv64Assembler::Lhu(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x5, rd, 0x03); +} + +void Riscv64Assembler::Lwu(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x6, rd, 0x3); +} + +// Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3 + +void Riscv64Assembler::Sb(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x0, 0x23); +} + +void Riscv64Assembler::Sh(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x1, 0x23); +} + +void Riscv64Assembler::Sw(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x2, 0x23); +} + +void Riscv64Assembler::Sd(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x3, 0x23); +} + +// IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Addi(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x0, rd, 0x13); +} + +void Riscv64Assembler::Slti(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x2, rd, 0x13); +} + +void Riscv64Assembler::Sltiu(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x3, rd, 0x13); +} + +void Riscv64Assembler::Xori(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x4, rd, 0x13); +} + +void Riscv64Assembler::Ori(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x6, rd, 0x13); +} + +void Riscv64Assembler::Andi(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x7, rd, 0x13); +} + +// 0x1 Split: 0x0(6b) + imm12(6b) +void Riscv64Assembler::Slli(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x0, shamt, rs1, 0x1, rd, 0x13); +} + +// 0x5 Split: 0x0(6b) + imm12(6b) +void Riscv64Assembler::Srli(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x0, shamt, rs1, 0x5, rd, 0x13); +} + +// 0x5 Split: 0x10(6b) + imm12(6b) +void Riscv64Assembler::Srai(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x10, shamt, rs1, 0x5, rd, 0x13); +} + +// ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x0, rd, 0x33); +} + +void Riscv64Assembler::Sub(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x0, rd, 0x33); +} + +void Riscv64Assembler::Slt(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x02, rd, 0x33); +} + +void Riscv64Assembler::Sltu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x03, rd, 0x33); +} + +void Riscv64Assembler::Xor(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x04, rd, 0x33); +} + +void Riscv64Assembler::Or(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x06, rd, 0x33); +} + +void Riscv64Assembler::And(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x07, rd, 0x33); +} + +void Riscv64Assembler::Sll(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x01, rd, 0x33); +} + +void Riscv64Assembler::Srl(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x05, rd, 0x33); +} + +void Riscv64Assembler::Sra(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x05, rd, 0x33); +} + +// 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5 + +void Riscv64Assembler::Addiw(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x0, rd, 0x1b); +} + +void Riscv64Assembler::Slliw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitR(0x0, shamt, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Srliw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitR(0x0, shamt, rs1, 0x5, rd, 0x1b); +} + +void Riscv64Assembler::Sraiw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitR(0x20, shamt, rs1, 0x5, rd, 0x1b); +} + +// 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Addw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Subw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Sllw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x1, rd, 0x3b); +} + +void Riscv64Assembler::Srlw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x5, rd, 0x3b); +} + +void Riscv64Assembler::Sraw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x5, rd, 0x3b); +} + +// Environment call and breakpoint (RV32I), opcode = 0x73 + +void Riscv64Assembler::Ecall() { EmitI(0x0, 0x0, 0x0, 0x0, 0x73); } + +void Riscv64Assembler::Ebreak() { EmitI(0x1, 0x0, 0x0, 0x0, 0x73); } + +// Fence instruction (RV32I): opcode = 0xf, funct3 = 0 + +void Riscv64Assembler::Fence(uint32_t pred, uint32_t succ) { + DCHECK(IsUint<4>(pred)); + DCHECK(IsUint<4>(succ)); + EmitI(/* normal fence */ 0x0 << 8 | pred << 4 | succ, 0x0, 0x0, 0x0, 0xf); +} + +void Riscv64Assembler::FenceTso() { + static constexpr uint32_t kPred = kFenceWrite | kFenceRead; + static constexpr uint32_t kSucc = kFenceWrite | kFenceRead; + EmitI(ToInt12(/* TSO fence */ 0x8 << 8 | kPred << 4 | kSucc), 0x0, 0x0, 0x0, 0xf); +} + +//////////////////////////////// RV64 "I" Instructions END //////////////////////////////// + +/////////////////////////// RV64 "Zifencei" Instructions START //////////////////////////// + +// "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1 +void Riscv64Assembler::FenceI() { EmitI(0x0, 0x0, 0x1, 0x0, 0xf); } + +//////////////////////////// RV64 "Zifencei" Instructions END ///////////////////////////// + +/////////////////////////////// RV64 "M" Instructions START /////////////////////////////// + +// RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Mul(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x0, rd, 0x33); +} + +void Riscv64Assembler::Mulh(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x1, rd, 0x33); +} + +void Riscv64Assembler::Mulhsu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x2, rd, 0x33); +} + +void Riscv64Assembler::Mulhu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x3, rd, 0x33); +} + +void Riscv64Assembler::Div(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Divu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x5, rd, 0x33); +} + +void Riscv64Assembler::Rem(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Remu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x7, rd, 0x33); +} + +// RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7 + +void Riscv64Assembler::Mulw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Divw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x4, rd, 0x3b); +} + +void Riscv64Assembler::Divuw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x5, rd, 0x3b); +} + +void Riscv64Assembler::Remw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x6, rd, 0x3b); +} + +void Riscv64Assembler::Remuw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x7, rd, 0x3b); +} + +//////////////////////////////// RV64 "M" Instructions END //////////////////////////////// + +/////////////////////////////// RV64 "A" Instructions START /////////////////////////////// + +void Riscv64Assembler::LrW(XRegister rd, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kRelease); + EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::LrD(XRegister rd, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kRelease); + EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kAcquire); + EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kAcquire); + EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +/////////////////////////////// RV64 "A" Instructions END /////////////////////////////// + +///////////////////////////// RV64 "Zicsr" Instructions START ///////////////////////////// + +// "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7 + +void Riscv64Assembler::Csrrw(XRegister rd, uint32_t csr, XRegister rs1) { + EmitI(ToInt12(csr), rs1, 0x1, rd, 0x73); +} + +void Riscv64Assembler::Csrrs(XRegister rd, uint32_t csr, XRegister rs1) { + EmitI(ToInt12(csr), rs1, 0x2, rd, 0x73); +} + +void Riscv64Assembler::Csrrc(XRegister rd, uint32_t csr, XRegister rs1) { + EmitI(ToInt12(csr), rs1, 0x3, rd, 0x73); +} + +void Riscv64Assembler::Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5) { + EmitI(ToInt12(csr), uimm5, 0x5, rd, 0x73); +} + +void Riscv64Assembler::Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5) { + EmitI(ToInt12(csr), uimm5, 0x6, rd, 0x73); +} + +void Riscv64Assembler::Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5) { + EmitI(ToInt12(csr), uimm5, 0x7, rd, 0x73); +} + +////////////////////////////// RV64 "Zicsr" Instructions END ////////////////////////////// + +/////////////////////////////// RV64 "FD" Instructions START /////////////////////////////// + +// FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27 + +void Riscv64Assembler::FLw(FRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x2, rd, 0x07); +} + +void Riscv64Assembler::FLd(FRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x3, rd, 0x07); +} + +void Riscv64Assembler::FSw(FRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x2, 0x27); +} + +void Riscv64Assembler::FSd(FRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x3, 0x27); +} + +// FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f + +void Riscv64Assembler::FMAddS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43); +} + +void Riscv64Assembler::FMAddD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43); +} + +void Riscv64Assembler::FMSubS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47); +} + +void Riscv64Assembler::FMSubD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47); +} + +void Riscv64Assembler::FNMSubS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b); +} + +void Riscv64Assembler::FNMSubD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b); +} + +void Riscv64Assembler::FNMAddS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f); +} + +void Riscv64Assembler::FNMAddD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f); +} + +// Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D + +void Riscv64Assembler::FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x4, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x5, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x8, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x9, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0xc, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0xd, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x2c, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x2d, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSgnjS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x10, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FSgnjD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x11, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x10, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x11, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x10, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x11, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FMinS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x14, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMinD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x15, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMaxS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x14, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FMaxD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x15, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x20, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm) { + // Note: The `frm` is useless, the result can represent every value of the source exactly. + EmitR(0x21, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +// FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D + +void Riscv64Assembler::FEqS(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x50, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FEqD(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x51, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FLtS(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x50, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FLtD(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x51, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FLeS(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x50, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FLeD(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x51, rs2, rs1, 0x0, rd, 0x53); +} + +// FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D + +void Riscv64Assembler::FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm) { + // Note: The `frm` is useless, the result can represent every value of the source exactly. + EmitR(0x69, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + // Note: The `frm` is useless, the result can represent every value of the source exactly. + EmitR(0x69, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x69, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x69, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +// FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D + +void Riscv64Assembler::FMvXW(XRegister rd, FRegister rs1) { + EmitR(0x70, 0x0, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMvXD(XRegister rd, FRegister rs1) { + EmitR(0x71, 0x0, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMvWX(FRegister rd, XRegister rs1) { + EmitR(0x78, 0x0, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMvDX(FRegister rd, XRegister rs1) { + EmitR(0x79, 0x0, rs1, 0x0, rd, 0x53); +} + +// FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D + +void Riscv64Assembler::FClassS(XRegister rd, FRegister rs1) { + EmitR(0x70, 0x0, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FClassD(XRegister rd, FRegister rs1) { + EmitR(0x71, 0x0, rs1, 0x1, rd, 0x53); +} + +/////////////////////////////// RV64 "FD" Instructions END /////////////////////////////// + +////////////////////////////// RV64 "Zba" Instructions START ///////////////////////////// + +void Riscv64Assembler::AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x4, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Sh1Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x2, rd, 0x33); +} + +void Riscv64Assembler::Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x2, rd, 0x3b); +} + +void Riscv64Assembler::Sh2Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x4, rd, 0x3b); +} + +void Riscv64Assembler::Sh3Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x6, rd, 0x3b); +} + +void Riscv64Assembler::SlliUw(XRegister rd, XRegister rs1, int32_t shamt) { + EmitI6(0x2, shamt, rs1, 0x1, rd, 0x1b); +} + +/////////////////////////////// RV64 "Zba" Instructions END ////////////////////////////// + +////////////////////////////// RV64 "Zbb" Instructions START ///////////////////////////// + +void Riscv64Assembler::Andn(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x7, rd, 0x33); +} + +void Riscv64Assembler::Orn(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Xnor(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Clz(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x0, rs1, 0x1, rd, 0x13); +} + +void Riscv64Assembler::Clzw(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x0, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Ctz(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x1, rs1, 0x1, rd, 0x13); +} + +void Riscv64Assembler::Ctzw(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x1, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Cpop(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x2, rs1, 0x1, rd, 0x13); +} + +void Riscv64Assembler::Cpopw(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x2, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Min(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Minu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x5, rd, 0x33); +} + +void Riscv64Assembler::Max(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Maxu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x7, rd, 0x33); +} + +void Riscv64Assembler::Rol(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x1, rd, 0x33); +} + +void Riscv64Assembler::Rolw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x1, rd, 0x3b); +} + +void Riscv64Assembler::Ror(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x5, rd, 0x33); +} + +void Riscv64Assembler::Rorw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x5, rd, 0x3b); +} + +void Riscv64Assembler::Rori(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x18, shamt, rs1, 0x5, rd, 0x13); +} + +void Riscv64Assembler::Roriw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitI6(0x18, shamt, rs1, 0x5, rd, 0x1b); +} + +void Riscv64Assembler::OrcB(XRegister rd, XRegister rs1) { + EmitR(0x14, 0x7, rs1, 0x5, rd, 0x13); +} + +void Riscv64Assembler::Rev8(XRegister rd, XRegister rs1) { + EmitR(0x35, 0x18, rs1, 0x5, rd, 0x13); +} + +/////////////////////////////// RV64 "Zbb" Instructions END ////////////////////////////// + +////////////////////////////// RV64 MACRO Instructions START /////////////////////////////// + +// Pseudo instructions + +void Riscv64Assembler::Nop() { Addi(Zero, Zero, 0); } + +void Riscv64Assembler::Li(XRegister rd, int64_t imm) { + LoadImmediate(rd, imm, /*can_use_tmp=*/ false); +} + +void Riscv64Assembler::Mv(XRegister rd, XRegister rs) { Addi(rd, rs, 0); } + +void Riscv64Assembler::Not(XRegister rd, XRegister rs) { Xori(rd, rs, -1); } + +void Riscv64Assembler::Neg(XRegister rd, XRegister rs) { Sub(rd, Zero, rs); } + +void Riscv64Assembler::NegW(XRegister rd, XRegister rs) { Subw(rd, Zero, rs); } + +void Riscv64Assembler::SextB(XRegister rd, XRegister rs) { + Slli(rd, rs, kXlen - 8u); + Srai(rd, rd, kXlen - 8u); +} + +void Riscv64Assembler::SextH(XRegister rd, XRegister rs) { + Slli(rd, rs, kXlen - 16u); + Srai(rd, rd, kXlen - 16u); +} + +void Riscv64Assembler::SextW(XRegister rd, XRegister rs) { Addiw(rd, rs, 0); } + +void Riscv64Assembler::ZextB(XRegister rd, XRegister rs) { Andi(rd, rs, 0xff); } + +void Riscv64Assembler::ZextH(XRegister rd, XRegister rs) { + Slli(rd, rs, kXlen - 16u); + Srli(rd, rd, kXlen - 16u); +} + +void Riscv64Assembler::ZextW(XRegister rd, XRegister rs) { + // TODO(riscv64): Use the ZEXT.W alias for ADD.UW from the Zba extension. + Slli(rd, rs, kXlen - 32u); + Srli(rd, rd, kXlen - 32u); +} + +void Riscv64Assembler::Seqz(XRegister rd, XRegister rs) { Sltiu(rd, rs, 1); } + +void Riscv64Assembler::Snez(XRegister rd, XRegister rs) { Sltu(rd, Zero, rs); } + +void Riscv64Assembler::Sltz(XRegister rd, XRegister rs) { Slt(rd, rs, Zero); } + +void Riscv64Assembler::Sgtz(XRegister rd, XRegister rs) { Slt(rd, Zero, rs); } + +void Riscv64Assembler::FMvS(FRegister rd, FRegister rs) { FSgnjS(rd, rs, rs); } + +void Riscv64Assembler::FAbsS(FRegister rd, FRegister rs) { FSgnjxS(rd, rs, rs); } + +void Riscv64Assembler::FNegS(FRegister rd, FRegister rs) { FSgnjnS(rd, rs, rs); } + +void Riscv64Assembler::FMvD(FRegister rd, FRegister rs) { FSgnjD(rd, rs, rs); } + +void Riscv64Assembler::FAbsD(FRegister rd, FRegister rs) { FSgnjxD(rd, rs, rs); } + +void Riscv64Assembler::FNegD(FRegister rd, FRegister rs) { FSgnjnD(rd, rs, rs); } + +void Riscv64Assembler::Beqz(XRegister rs, int32_t offset) { + Beq(rs, Zero, offset); +} + +void Riscv64Assembler::Bnez(XRegister rs, int32_t offset) { + Bne(rs, Zero, offset); +} + +void Riscv64Assembler::Blez(XRegister rt, int32_t offset) { + Bge(Zero, rt, offset); +} + +void Riscv64Assembler::Bgez(XRegister rt, int32_t offset) { + Bge(rt, Zero, offset); +} + +void Riscv64Assembler::Bltz(XRegister rt, int32_t offset) { + Blt(rt, Zero, offset); +} + +void Riscv64Assembler::Bgtz(XRegister rt, int32_t offset) { + Blt(Zero, rt, offset); +} + +void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, int32_t offset) { + Blt(rt, rs, offset); +} + +void Riscv64Assembler::Ble(XRegister rs, XRegister rt, int32_t offset) { + Bge(rt, rs, offset); +} + +void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, int32_t offset) { + Bltu(rt, rs, offset); +} + +void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, int32_t offset) { + Bgeu(rt, rs, offset); +} + +void Riscv64Assembler::J(int32_t offset) { Jal(Zero, offset); } + +void Riscv64Assembler::Jal(int32_t offset) { Jal(RA, offset); } + +void Riscv64Assembler::Jr(XRegister rs) { Jalr(Zero, rs, 0); } + +void Riscv64Assembler::Jalr(XRegister rs) { Jalr(RA, rs, 0); } + +void Riscv64Assembler::Jalr(XRegister rd, XRegister rs) { Jalr(rd, rs, 0); } + +void Riscv64Assembler::Ret() { Jalr(Zero, RA, 0); } + +void Riscv64Assembler::RdCycle(XRegister rd) { + Csrrs(rd, 0xc00, Zero); +} + +void Riscv64Assembler::RdTime(XRegister rd) { + Csrrs(rd, 0xc01, Zero); +} + +void Riscv64Assembler::RdInstret(XRegister rd) { + Csrrs(rd, 0xc02, Zero); +} + +void Riscv64Assembler::Csrr(XRegister rd, uint32_t csr) { + Csrrs(rd, csr, Zero); +} + +void Riscv64Assembler::Csrw(uint32_t csr, XRegister rs) { + Csrrw(Zero, csr, rs); +} + +void Riscv64Assembler::Csrs(uint32_t csr, XRegister rs) { + Csrrs(Zero, csr, rs); +} + +void Riscv64Assembler::Csrc(uint32_t csr, XRegister rs) { + Csrrc(Zero, csr, rs); +} + +void Riscv64Assembler::Csrwi(uint32_t csr, uint32_t uimm5) { + Csrrwi(Zero, csr, uimm5); +} + +void Riscv64Assembler::Csrsi(uint32_t csr, uint32_t uimm5) { + Csrrsi(Zero, csr, uimm5); +} + +void Riscv64Assembler::Csrci(uint32_t csr, uint32_t uimm5) { + Csrrci(Zero, csr, uimm5); +} + +void Riscv64Assembler::Loadb(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lb>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadh(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lh>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadw(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lw>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadd(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Ld>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadbu(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lbu>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadhu(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lhu>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadwu(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lwu>(rd, rs1, offset); +} + +void Riscv64Assembler::Storeb(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sb>(rs2, rs1, offset); +} + +void Riscv64Assembler::Storeh(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sh>(rs2, rs1, offset); +} + +void Riscv64Assembler::Storew(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sw>(rs2, rs1, offset); +} + +void Riscv64Assembler::Stored(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sd>(rs2, rs1, offset); +} + +void Riscv64Assembler::FLoadw(FRegister rd, XRegister rs1, int32_t offset) { + FLoadFromOffset<&Riscv64Assembler::FLw>(rd, rs1, offset); +} + +void Riscv64Assembler::FLoadd(FRegister rd, XRegister rs1, int32_t offset) { + FLoadFromOffset<&Riscv64Assembler::FLd>(rd, rs1, offset); +} + +void Riscv64Assembler::FStorew(FRegister rs2, XRegister rs1, int32_t offset) { + FStoreToOffset<&Riscv64Assembler::FSw>(rs2, rs1, offset); +} + +void Riscv64Assembler::FStored(FRegister rs2, XRegister rs1, int32_t offset) { + FStoreToOffset<&Riscv64Assembler::FSd>(rs2, rs1, offset); +} + +void Riscv64Assembler::LoadConst32(XRegister rd, int32_t value) { + // No need to use a temporary register for 32-bit values. + LoadImmediate(rd, value, /*can_use_tmp=*/ false); +} + +void Riscv64Assembler::LoadConst64(XRegister rd, int64_t value) { + LoadImmediate(rd, value, /*can_use_tmp=*/ true); +} + +template <typename ValueType, typename Addi, typename AddLarge> +void AddConstImpl(Riscv64Assembler* assembler, + XRegister rd, + XRegister rs1, + ValueType value, + Addi&& addi, + AddLarge&& add_large) { + ScratchRegisterScope srs(assembler); + // A temporary must be available for adjustment even if it's not needed. + // However, `rd` can be used as the temporary unless it's the same as `rs1` or SP. + DCHECK_IMPLIES(rd == rs1 || rd == SP, srs.AvailableXRegisters() != 0u); + + if (IsInt<12>(value)) { + addi(rd, rs1, value); + return; + } + + constexpr int32_t kPositiveValueSimpleAdjustment = 0x7ff; + constexpr int32_t kHighestValueForSimpleAdjustment = 2 * kPositiveValueSimpleAdjustment; + constexpr int32_t kNegativeValueSimpleAdjustment = -0x800; + constexpr int32_t kLowestValueForSimpleAdjustment = 2 * kNegativeValueSimpleAdjustment; + + if (rd != rs1 && rd != SP) { + srs.IncludeXRegister(rd); + } + XRegister tmp = srs.AllocateXRegister(); + if (value >= 0 && value <= kHighestValueForSimpleAdjustment) { + addi(tmp, rs1, kPositiveValueSimpleAdjustment); + addi(rd, tmp, value - kPositiveValueSimpleAdjustment); + } else if (value < 0 && value >= kLowestValueForSimpleAdjustment) { + addi(tmp, rs1, kNegativeValueSimpleAdjustment); + addi(rd, tmp, value - kNegativeValueSimpleAdjustment); + } else { + add_large(rd, rs1, value, tmp); + } +} + +void Riscv64Assembler::AddConst32(XRegister rd, XRegister rs1, int32_t value) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + auto addiw = [&](XRegister rd, XRegister rs1, int32_t value) { Addiw(rd, rs1, value); }; + auto add_large = [&](XRegister rd, XRegister rs1, int32_t value, XRegister tmp) { + LoadConst32(tmp, value); + Addw(rd, rs1, tmp); + }; + AddConstImpl(this, rd, rs1, value, addiw, add_large); +} + +void Riscv64Assembler::AddConst64(XRegister rd, XRegister rs1, int64_t value) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + auto addi = [&](XRegister rd, XRegister rs1, int32_t value) { Addi(rd, rs1, value); }; + auto add_large = [&](XRegister rd, XRegister rs1, int64_t value, XRegister tmp) { + // We may not have another scratch register for `LoadConst64()`, so use `Li()`. + // TODO(riscv64): Refactor `LoadImmediate()` so that we can reuse the code to detect + // when the code path using the scratch reg is beneficial, and use that path with a + // small modification - instead of adding the two parts togeter, add them individually + // to the input `rs1`. (This works as long as `rd` is not the same as `tmp`.) + Li(tmp, value); + Add(rd, rs1, tmp); + }; + AddConstImpl(this, rd, rs1, value, addi, add_large); +} + +void Riscv64Assembler::Beqz(XRegister rs, Riscv64Label* label, bool is_bare) { + Beq(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bnez(XRegister rs, Riscv64Label* label, bool is_bare) { + Bne(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Blez(XRegister rs, Riscv64Label* label, bool is_bare) { + Ble(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bgez(XRegister rs, Riscv64Label* label, bool is_bare) { + Bge(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bltz(XRegister rs, Riscv64Label* label, bool is_bare) { + Blt(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bgtz(XRegister rs, Riscv64Label* label, bool is_bare) { + Bgt(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondEQ, rs, rt); +} + +void Riscv64Assembler::Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondNE, rs, rt); +} + +void Riscv64Assembler::Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLE, rs, rt); +} + +void Riscv64Assembler::Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGE, rs, rt); +} + +void Riscv64Assembler::Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLT, rs, rt); +} + +void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGT, rs, rt); +} + +void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLEU, rs, rt); +} + +void Riscv64Assembler::Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGEU, rs, rt); +} + +void Riscv64Assembler::Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLTU, rs, rt); +} + +void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGTU, rs, rt); +} + +void Riscv64Assembler::Jal(XRegister rd, Riscv64Label* label, bool is_bare) { + Buncond(label, rd, is_bare); +} + +void Riscv64Assembler::J(Riscv64Label* label, bool is_bare) { + Jal(Zero, label, is_bare); +} + +void Riscv64Assembler::Jal(Riscv64Label* label, bool is_bare) { + Jal(RA, label, is_bare); +} + +void Riscv64Assembler::Loadw(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteral); +} + +void Riscv64Assembler::Loadwu(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteralUnsigned); +} + +void Riscv64Assembler::Loadd(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + LoadLiteral(literal, rd, Branch::kLiteralLong); +} + +void Riscv64Assembler::FLoadw(FRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteralFloat); +} + +void Riscv64Assembler::FLoadd(FRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + LoadLiteral(literal, rd, Branch::kLiteralDouble); +} + +void Riscv64Assembler::Unimp() { + // TODO(riscv64): use 16-bit zero C.UNIMP once we support compression + Emit(0xC0001073); +} + +/////////////////////////////// RV64 MACRO Instructions END /////////////////////////////// + +const Riscv64Assembler::Branch::BranchInfo Riscv64Assembler::Branch::branch_info_[] = { + // Short branches (can be promoted to longer). + {4, 0, Riscv64Assembler::Branch::kOffset13}, // kCondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kUncondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kCall + // Short branches (can't be promoted to longer). + {4, 0, Riscv64Assembler::Branch::kOffset13}, // kBareCondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareUncondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareCall + + // Medium branch. + {8, 4, Riscv64Assembler::Branch::kOffset21}, // kCondBranch21 + + // Long branches. + {12, 4, Riscv64Assembler::Branch::kOffset32}, // kLongCondBranch + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongUncondBranch + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongCall + + // label. + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLabel + + // literals. + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteral + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralUnsigned + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralLong + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralFloat + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralDouble +}; + +void Riscv64Assembler::Branch::InitShortOrLong(Riscv64Assembler::Branch::OffsetBits offset_size, + Riscv64Assembler::Branch::Type short_type, + Riscv64Assembler::Branch::Type long_type, + Riscv64Assembler::Branch::Type longest_type) { + Riscv64Assembler::Branch::Type type = short_type; + if (offset_size > branch_info_[type].offset_size) { + type = long_type; + if (offset_size > branch_info_[type].offset_size) { + type = longest_type; + } + } + type_ = type; +} + +void Riscv64Assembler::Branch::InitializeType(Type initial_type) { + OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_); + + switch (initial_type) { + case kCondBranch: + if (condition_ != kUncond) { + InitShortOrLong(offset_size_needed, kCondBranch, kCondBranch21, kLongCondBranch); + break; + } + FALLTHROUGH_INTENDED; + case kUncondBranch: + InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch, kLongUncondBranch); + break; + case kCall: + InitShortOrLong(offset_size_needed, kCall, kLongCall, kLongCall); + break; + case kBareCondBranch: + if (condition_ != kUncond) { + type_ = kBareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + } + FALLTHROUGH_INTENDED; + case kBareUncondBranch: + type_ = kBareUncondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kBareCall: + type_ = kBareCall; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kLabel: + type_ = initial_type; + break; + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: + case kLiteralFloat: + case kLiteralDouble: + CHECK(!IsResolved()); + type_ = initial_type; + break; + default: + LOG(FATAL) << "Unexpected branch type " << enum_cast<uint32_t>(initial_type); + UNREACHABLE(); + } + + old_type_ = type_; +} + +bool Riscv64Assembler::Branch::IsNop(BranchCondition condition, XRegister lhs, XRegister rhs) { + switch (condition) { + case kCondNE: + case kCondLT: + case kCondGT: + case kCondLTU: + case kCondGTU: + return lhs == rhs; + default: + return false; + } +} + +bool Riscv64Assembler::Branch::IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondEQ: + case kCondGE: + case kCondLE: + case kCondLEU: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Riscv64Assembler::Branch::Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(rd), + rhs_reg_(Zero), + freg_(kNoFRegister), + condition_(kUncond) { + InitializeType( + (rd != Zero ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareUncondBranch : kUncondBranch))); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Riscv64Assembler::BranchCondition condition, + XRegister lhs_reg, + XRegister rhs_reg, + bool is_bare) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + freg_(kNoFRegister), + condition_(condition) { + DCHECK_NE(condition, kUncond); + DCHECK(!IsNop(condition, lhs_reg, rhs_reg)); + DCHECK(!IsUncond(condition, lhs_reg, rhs_reg)); + InitializeType(is_bare ? kBareCondBranch : kCondBranch); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + XRegister rd, + Type label_or_literal_type) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(rd), + rhs_reg_(Zero), + freg_(kNoFRegister), + condition_(kUncond) { + CHECK_NE(rd , Zero); + InitializeType(label_or_literal_type); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + FRegister rd, + Type literal_type) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(Zero), + rhs_reg_(Zero), + freg_(rd), + condition_(kUncond) { + InitializeType(literal_type); +} + +Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::OppositeCondition( + Riscv64Assembler::BranchCondition cond) { + switch (cond) { + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kCondLEU: + return kCondGTU; + case kCondGTU: + return kCondLEU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond); + UNREACHABLE(); + } +} + +Riscv64Assembler::Branch::Type Riscv64Assembler::Branch::GetType() const { return type_; } + +Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::GetCondition() const { + return condition_; +} + +XRegister Riscv64Assembler::Branch::GetLeftRegister() const { return lhs_reg_; } + +XRegister Riscv64Assembler::Branch::GetRightRegister() const { return rhs_reg_; } + +FRegister Riscv64Assembler::Branch::GetFRegister() const { return freg_; } + +uint32_t Riscv64Assembler::Branch::GetTarget() const { return target_; } + +uint32_t Riscv64Assembler::Branch::GetLocation() const { return location_; } + +uint32_t Riscv64Assembler::Branch::GetOldLocation() const { return old_location_; } + +uint32_t Riscv64Assembler::Branch::GetLength() const { return branch_info_[type_].length; } + +uint32_t Riscv64Assembler::Branch::GetOldLength() const { return branch_info_[old_type_].length; } + +uint32_t Riscv64Assembler::Branch::GetEndLocation() const { return GetLocation() + GetLength(); } + +uint32_t Riscv64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldLength(); +} + +bool Riscv64Assembler::Branch::IsBare() const { + switch (type_) { + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: + return true; + default: + return false; + } +} + +bool Riscv64Assembler::Branch::IsResolved() const { return target_ != kUnresolved; } + +Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSize() const { + return branch_info_[type_].offset_size; +} + +Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSizeNeeded( + uint32_t location, uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) { + return kOffset13; + } + int64_t distance = static_cast<int64_t>(target) - location; + if (IsInt<kOffset13>(distance)) { + return kOffset13; + } else if (IsInt<kOffset21>(distance)) { + return kOffset21; + } else { + return kOffset32; + } +} + +void Riscv64Assembler::Branch::Resolve(uint32_t target) { target_ = target; } + +void Riscv64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + // All targets should be resolved before we start promoting branches. + DCHECK(IsResolved()); + if (location_ > expand_location) { + location_ += delta; + } + if (target_ > expand_location) { + target_ += delta; + } +} + +uint32_t Riscv64Assembler::Branch::PromoteIfNeeded() { + // All targets should be resolved before we start promoting branches. + DCHECK(IsResolved()); + Type old_type = type_; + switch (type_) { + // Short branches (can be promoted to longer). + case kCondBranch: { + OffsetBits needed_size = GetOffsetSizeNeeded(GetOffsetLocation(), target_); + if (needed_size <= GetOffsetSize()) { + return 0u; + } + // The offset remains the same for `kCondBranch21` for forward branches. + DCHECK_EQ(branch_info_[kCondBranch21].length - branch_info_[kCondBranch21].pc_offset, + branch_info_[kCondBranch].length - branch_info_[kCondBranch].pc_offset); + if (target_ <= location_) { + // Calculate the needed size for kCondBranch21. + needed_size = + GetOffsetSizeNeeded(location_ + branch_info_[kCondBranch21].pc_offset, target_); + } + type_ = (needed_size <= branch_info_[kCondBranch21].offset_size) + ? kCondBranch21 + : kLongCondBranch; + break; + } + case kUncondBranch: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongUncondBranch; + break; + case kCall: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongCall; + break; + // Medium branch (can be promoted to long). + case kCondBranch21: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongCondBranch; + break; + default: + // Other branch types cannot be promoted. + DCHECK_LE(GetOffsetSizeNeeded(GetOffsetLocation(), target_), GetOffsetSize()) << type_; + return 0u; + } + DCHECK(type_ != old_type); + DCHECK_GT(branch_info_[type_].length, branch_info_[old_type].length); + return branch_info_[type_].length - branch_info_[old_type].length; +} + +uint32_t Riscv64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].pc_offset; +} + +int32_t Riscv64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset_location = GetOffsetLocation(); + int32_t offset = static_cast<int32_t>(target_ - offset_location); + DCHECK_EQ(offset, static_cast<int64_t>(target_) - static_cast<int64_t>(offset_location)); + return offset; +} + +void Riscv64Assembler::EmitBcond(BranchCondition cond, + XRegister rs, + XRegister rt, + int32_t offset) { + switch (cond) { +#define DEFINE_CASE(COND, cond) \ + case kCond##COND: \ + B##cond(rs, rt, offset); \ + break; + DEFINE_CASE(EQ, eq) + DEFINE_CASE(NE, ne) + DEFINE_CASE(LT, lt) + DEFINE_CASE(GE, ge) + DEFINE_CASE(LE, le) + DEFINE_CASE(GT, gt) + DEFINE_CASE(LTU, ltu) + DEFINE_CASE(GEU, geu) + DEFINE_CASE(LEU, leu) + DEFINE_CASE(GTU, gtu) +#undef DEFINE_CASE + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond); + UNREACHABLE(); + } +} + +void Riscv64Assembler::EmitBranch(Riscv64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + const int32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + XRegister lhs = branch->GetLeftRegister(); + XRegister rhs = branch->GetRightRegister(); + + auto emit_auipc_and_next = [&](XRegister reg, auto next) { + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + auto [imm20, short_offset] = SplitOffset(offset); + Auipc(reg, imm20); + next(short_offset); + }; + + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + case Branch::kBareUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + J(offset); + break; + case Branch::kCondBranch: + case Branch::kBareCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcond(condition, lhs, rhs, offset); + break; + case Branch::kCall: + case Branch::kBareCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + DCHECK(lhs != Zero); + Jal(lhs, offset); + break; + + // Medium branch. + case Branch::kCondBranch21: + EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength()); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + J(offset); + break; + + // Long branches. + case Branch::kLongCondBranch: + EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength()); + FALLTHROUGH_INTENDED; + case Branch::kLongUncondBranch: + emit_auipc_and_next(TMP, [&](int32_t short_offset) { Jalr(Zero, TMP, short_offset); }); + break; + case Branch::kLongCall: + DCHECK(lhs != Zero); + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Jalr(lhs, lhs, short_offset); }); + break; + + // label. + case Branch::kLabel: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Addi(lhs, lhs, short_offset); }); + break; + // literals. + case Branch::kLiteral: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lw(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralUnsigned: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lwu(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralLong: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Ld(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralFloat: + emit_auipc_and_next( + TMP, [&](int32_t short_offset) { FLw(branch->GetFRegister(), TMP, short_offset); }); + break; + case Branch::kLiteralDouble: + emit_auipc_and_next( + TMP, [&](int32_t short_offset) { FLd(branch->GetFRegister(), TMP, short_offset); }); + break; + } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LE(branch->GetLength(), static_cast<uint32_t>(Branch::kMaxBranchLength)); +} + +void Riscv64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + +void Riscv64Assembler::FinalizeLabeledBranch(Riscv64Label* label) { + // TODO(riscv64): Support "C" Standard Extension - length may not be a multiple of 4. + DCHECK_ALIGNED(branches_.back().GetLength(), sizeof(uint32_t)); + uint32_t length = branches_.back().GetLength() / sizeof(uint32_t); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + for (; length != 0u; --length) { + Nop(); + } +} + +void Riscv64Assembler::Bcond( + Riscv64Label* label, bool is_bare, BranchCondition condition, XRegister lhs, XRegister rhs) { + // TODO(riscv64): Should an assembler perform these optimizations, or should we remove them? + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + if (Branch::IsUncond(condition, lhs, rhs)) { + Buncond(label, Zero, is_bare); + return; + } + + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs, is_bare); + FinalizeLabeledBranch(label); +} + +void Riscv64Assembler::Buncond(Riscv64Label* label, XRegister rd, bool is_bare) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, rd, is_bare); + FinalizeLabeledBranch(label); +} + +template <typename XRegisterOrFRegister> +void Riscv64Assembler::LoadLiteral(Literal* literal, + XRegisterOrFRegister rd, + Branch::Type literal_type) { + Riscv64Label* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), Branch::kUnresolved, rd, literal_type); + FinalizeLabeledBranch(label); +} + +Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Riscv64Assembler::Bind(Riscv64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); + + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); + + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); + + // On to the previous branch in the list... + label->position_ = prev; + } + + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + uint32_t prev_branch_id = Riscv64Label::kNoPrevBranchId; + if (!branches_.empty()) { + prev_branch_id = branches_.size() - 1u; + const Branch* prev_branch = GetBranch(prev_branch_id); + bound_pc -= prev_branch->GetEndLocation(); + } + label->prev_branch_id_ = prev_branch_id; + label->BindTo(bound_pc); +} + +void Riscv64Assembler::LoadLabelAddress(XRegister rd, Riscv64Label* label) { + DCHECK_NE(rd, Zero); + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, rd, Branch::kLabel); + FinalizeLabeledBranch(label); +} + +Literal* Riscv64Assembler::NewLiteral(size_t size, const uint8_t* data) { + // We don't support byte and half-word literals. + if (size == 4u) { + literals_.emplace_back(size, data); + return &literals_.back(); + } else { + DCHECK_EQ(size, 8u); + long_literals_.emplace_back(size, data); + return &long_literals_.back(); + } +} + +JumpTable* Riscv64Assembler::CreateJumpTable(ArenaVector<Riscv64Label*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + +uint32_t Riscv64Assembler::GetLabelLocation(const Riscv64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_ != Riscv64Label::kNoPrevBranchId) { + // Get label location based on the branch preceding it. + const Branch* prev_branch = GetBranch(label->prev_branch_id_); + target += prev_branch->GetEndLocation(); + } + return target; +} + +uint32_t Riscv64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the `old_position`. Since we expect `GetAdjustedPosition()` to be called in a loop + // with increasing `old_position`, we can use the data from last `GetAdjustedPosition()` to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetLength() - branch->GetOldLength(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Riscv64Assembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + Riscv64Label* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with placeholder data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; ++i) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void Riscv64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta != 0u) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0;) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } + + // Align 64-bit literals by moving them up by 4 bytes if needed. + // This can increase the PC-relative distance but all literals are accessed with AUIPC+Load(imm12) + // without branch promotion, so this late adjustment cannot take them out of instruction range. + if (!long_literals_.empty()) { + uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel()); + size_t lit_size = long_literals_.size() * sizeof(uint64_t); + size_t buf_size = buffer_.Size(); + // 64-bit literals must be at the very end of the buffer. + CHECK_EQ(first_literal_location + lit_size, buf_size); + if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) { + // Insert the padding. + buffer_.Resize(buf_size + sizeof(uint32_t)); + buffer_.Move(first_literal_location + sizeof(uint32_t), first_literal_location, lit_size); + DCHECK(!overwriting_); + overwriting_ = true; + overwrite_location_ = first_literal_location; + Emit(0); // Illegal instruction. + overwriting_ = false; + // Increase target addresses in literal and address loads by 4 bytes in order for correct + // offsets from PC to be generated. + for (auto& branch : branches_) { + uint32_t target = branch.GetTarget(); + if (target >= first_literal_location) { + branch.Resolve(target + sizeof(uint32_t)); + } + } + // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal, + // we need to adjust the location of the literal's label as well. + for (Literal& literal : long_literals_) { + // Bound label's position is negative, hence decrementing it instead of incrementing. + literal.GetLabel()->position_ -= sizeof(uint32_t); + } + } + } +} + +void Riscv64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + static constexpr size_t kExtraSpace = 16; // Not every PC advance can be encoded in one byte. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + kExtraSpace); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Riscv64Assembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + Riscv64Label* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (Riscv64Label* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + +void Riscv64Assembler::EmitLiterals() { + if (!literals_.empty()) { + for (Literal& literal : literals_) { + Riscv64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 4u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } + if (!long_literals_.empty()) { + // These need to be 8-byte-aligned but we shall add the alignment padding after the branch + // promotion, if needed. Since all literals are accessed with AUIPC+Load(imm12) without branch + // promotion, this late adjustment cannot take long literals out of instruction range. + for (Literal& literal : long_literals_) { + Riscv64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + +// This method is used to adjust the base register and offset pair for +// a load/store when the offset doesn't fit into 12-bit signed integer. +void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base, + int32_t& offset, + ScratchRegisterScope& srs) { + // A scratch register must be available for adjustment even if it's not needed. + CHECK_NE(srs.AvailableXRegisters(), 0u); + if (IsInt<12>(offset)) { + return; + } + + constexpr int32_t kPositiveOffsetMaxSimpleAdjustment = 0x7ff; + constexpr int32_t kHighestOffsetForSimpleAdjustment = 2 * kPositiveOffsetMaxSimpleAdjustment; + constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned8 = + RoundDown(kPositiveOffsetMaxSimpleAdjustment, 8); + constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned4 = + RoundDown(kPositiveOffsetMaxSimpleAdjustment, 4); + constexpr int32_t kNegativeOffsetSimpleAdjustment = -0x800; + constexpr int32_t kLowestOffsetForSimpleAdjustment = 2 * kNegativeOffsetSimpleAdjustment; + + XRegister tmp = srs.AllocateXRegister(); + if (offset >= 0 && offset <= kHighestOffsetForSimpleAdjustment) { + // Make the adjustment 8-byte aligned (0x7f8) except for offsets that cannot be reached + // with this adjustment, then try 4-byte alignment, then just half of the offset. + int32_t adjustment = IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned8) + ? kPositiveOffsetSimpleAdjustmentAligned8 + : IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned4) + ? kPositiveOffsetSimpleAdjustmentAligned4 + : offset / 2; + DCHECK(IsInt<12>(adjustment)); + Addi(tmp, base, adjustment); + offset -= adjustment; + } else if (offset < 0 && offset >= kLowestOffsetForSimpleAdjustment) { + Addi(tmp, base, kNegativeOffsetSimpleAdjustment); + offset -= kNegativeOffsetSimpleAdjustment; + } else if (offset >= 0x7ffff800) { + // Support even large offsets outside the range supported by `SplitOffset()`. + LoadConst32(tmp, offset); + Add(tmp, tmp, base); + offset = 0; + } else { + auto [imm20, short_offset] = SplitOffset(offset); + Lui(tmp, imm20); + Add(tmp, tmp, base); + offset = short_offset; + } + base = tmp; +} + +template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> +void Riscv64Assembler::LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + // If `rd` differs from `rs1`, allow using it as a temporary if needed. + if (rd != rs1) { + srs.IncludeXRegister(rd); + } + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rd, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> +void Riscv64Assembler::StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rs2) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rs2, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> +void Riscv64Assembler::FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rd, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> +void Riscv64Assembler::FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rs2, rs1, offset); +} + +void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp) { + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + CHECK_IMPLIES(can_use_tmp, srs.AvailableXRegisters() != 0u); + + // Helper lambdas. + auto addi = [&](XRegister rd, XRegister rs, int32_t imm) { Addi(rd, rs, imm); }; + auto addiw = [&](XRegister rd, XRegister rs, int32_t imm) { Addiw(rd, rs, imm); }; + auto slli = [&](XRegister rd, XRegister rs, int32_t imm) { Slli(rd, rs, imm); }; + auto lui = [&](XRegister rd, uint32_t imm20) { Lui(rd, imm20); }; + + // Simple LUI+ADDI/W can handle value range [-0x80000800, 0x7fffffff]. + auto is_simple_li_value = [](int64_t value) { + return value >= INT64_C(-0x80000800) && value <= INT64_C(0x7fffffff); + }; + auto emit_simple_li_helper = [&](XRegister rd, + int64_t value, + auto&& addi, + auto&& addiw, + auto&& slli, + auto&& lui) { + DCHECK(is_simple_li_value(value)) << "0x" << std::hex << value; + if (IsInt<12>(value)) { + addi(rd, Zero, value); + } else if (CTZ(value) < 12 && IsInt(6 + CTZ(value), value)) { + // This path yields two 16-bit instructions with the "C" Standard Extension. + addi(rd, Zero, value >> CTZ(value)); + slli(rd, rd, CTZ(value)); + } else if (value < INT64_C(-0x80000000)) { + int32_t small_value = dchecked_integral_cast<int32_t>(value - INT64_C(-0x80000000)); + DCHECK(IsInt<12>(small_value)); + DCHECK_LT(small_value, 0); + lui(rd, 1u << 19); + addi(rd, rd, small_value); + } else { + DCHECK(IsInt<32>(value)); + // Note: Similar to `SplitOffset()` but we can target the full 32-bit range with ADDIW. + int64_t near_value = (value + 0x800) & ~0xfff; + int32_t small_value = value - near_value; + DCHECK(IsInt<12>(small_value)); + uint32_t imm20 = static_cast<uint32_t>(near_value) >> 12; + DCHECK_NE(imm20, 0u); // Small values are handled above. + lui(rd, imm20); + if (small_value != 0) { + addiw(rd, rd, small_value); + } + } + }; + auto emit_simple_li = [&](XRegister rd, int64_t value) { + emit_simple_li_helper(rd, value, addi, addiw, slli, lui); + }; + auto count_simple_li_instructions = [&](int64_t value) { + size_t num_instructions = 0u; + auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; }; + auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; }; + emit_simple_li_helper(Zero, value, count_rri, count_rri, count_rri, count_ru); + return num_instructions; + }; + + // If LUI+ADDI/W is not enough, we can generate up to 3 SLLI+ADDI afterwards (up to 8 instructions + // total). The ADDI from the first SLLI+ADDI pair can be a no-op. + auto emit_with_slli_addi_helper = [&](XRegister rd, + int64_t value, + auto&& addi, + auto&& addiw, + auto&& slli, + auto&& lui) { + static constexpr size_t kMaxNumSllAddi = 3u; + int32_t addi_values[kMaxNumSllAddi]; + size_t sll_shamts[kMaxNumSllAddi]; + size_t num_sll_addi = 0u; + while (!is_simple_li_value(value)) { + DCHECK_LT(num_sll_addi, kMaxNumSllAddi); + // Prepare sign-extended low 12 bits for ADDI. + int64_t addi_value = (value & 0xfff) - ((value & 0x800) << 1); + DCHECK(IsInt<12>(addi_value)); + int64_t remaining = value - addi_value; + size_t shamt = CTZ(remaining); + DCHECK_GE(shamt, 12u); + addi_values[num_sll_addi] = addi_value; + sll_shamts[num_sll_addi] = shamt; + value = remaining >> shamt; + ++num_sll_addi; + } + if (num_sll_addi != 0u && IsInt<20>(value) && !IsInt<12>(value)) { + // If `sll_shamts[num_sll_addi - 1u]` was only 12, we would have stopped + // the decomposition a step earlier with smaller `num_sll_addi`. + DCHECK_GT(sll_shamts[num_sll_addi - 1u], 12u); + // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate. + sll_shamts[num_sll_addi - 1u] -= 12u; + lui(rd, dchecked_integral_cast<uint32_t>(value & 0xfffff)); + } else { + emit_simple_li_helper(rd, value, addi, addiw, slli, lui); + } + for (size_t i = num_sll_addi; i != 0u; ) { + --i; + slli(rd, rd, sll_shamts[i]); + if (addi_values[i] != 0) { + addi(rd, rd, addi_values[i]); + } + } + }; + auto emit_with_slli_addi = [&](XRegister rd, int64_t value) { + emit_with_slli_addi_helper(rd, value, addi, addiw, slli, lui); + }; + auto count_instructions_with_slli_addi = [&](int64_t value) { + size_t num_instructions = 0u; + auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; }; + auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; }; + emit_with_slli_addi_helper(Zero, value, count_rri, count_rri, count_rri, count_ru); + return num_instructions; + }; + + size_t insns_needed = count_instructions_with_slli_addi(imm); + size_t trailing_slli_shamt = 0u; + if (insns_needed > 2u) { + // Sometimes it's better to end with a SLLI even when the above code would end with ADDI. + if ((imm & 1) == 0 && (imm & 0xfff) != 0) { + int64_t value = imm >> CTZ(imm); + size_t new_insns_needed = count_instructions_with_slli_addi(value) + /*SLLI*/ 1u; + DCHECK_GT(new_insns_needed, 2u); + if (insns_needed > new_insns_needed) { + insns_needed = new_insns_needed; + trailing_slli_shamt = CTZ(imm); + } + } + + // Sometimes we can emit a shorter sequence that ends with SRLI. + if (imm > 0) { + size_t shamt = CLZ(static_cast<uint64_t>(imm)); + DCHECK_LE(shamt, 32u); // Otherwise we would not get here as `insns_needed` would be <= 2. + if (imm == dchecked_integral_cast<int64_t>(MaxInt<uint64_t>(64 - shamt))) { + Addi(rd, Zero, -1); + Srli(rd, rd, shamt); + return; + } + + int64_t value = static_cast<int64_t>(static_cast<uint64_t>(imm) << shamt); + DCHECK_LT(value, 0); + if (is_simple_li_value(value)){ + size_t new_insns_needed = count_simple_li_instructions(value) + /*SRLI*/ 1u; + // In case of equal number of instructions, clang prefers the sequence without SRLI. + if (new_insns_needed < insns_needed) { + // If we emit ADDI, we set low bits that shall be shifted out to one in line with clang, + // effectively choosing to emit the negative constant closest to zero. + int32_t shifted_out = dchecked_integral_cast<int32_t>(MaxInt<uint32_t>(shamt)); + DCHECK_EQ(value & shifted_out, 0); + emit_simple_li(rd, (value & 0xfff) == 0 ? value : value + shifted_out); + Srli(rd, rd, shamt); + return; + } + } + + size_t ctz = CTZ(static_cast<uint64_t>(value)); + if (IsInt(ctz + 20, value)) { + size_t new_insns_needed = /*ADDI or LUI*/ 1u + /*SLLI*/ 1u + /*SRLI*/ 1u; + if (new_insns_needed < insns_needed) { + // Clang prefers ADDI+SLLI+SRLI over LUI+SLLI+SRLI. + if (IsInt(ctz + 12, value)) { + Addi(rd, Zero, value >> ctz); + Slli(rd, rd, ctz); + } else { + Lui(rd, (static_cast<uint64_t>(value) >> ctz) & 0xfffffu); + Slli(rd, rd, ctz - 12); + } + Srli(rd, rd, shamt); + return; + } + } + } + + // If we can use a scratch register, try using it to emit a shorter sequence. Without a + // scratch reg, the sequence is up to 8 instructions, with a scratch reg only up to 6. + if (can_use_tmp) { + int64_t low = (imm & 0xffffffff) - ((imm & 0x80000000) << 1); + int64_t remainder = imm - low; + size_t slli_shamt = CTZ(remainder); + DCHECK_GE(slli_shamt, 32u); + int64_t high = remainder >> slli_shamt; + size_t new_insns_needed = + ((IsInt<20>(high) || (high & 0xfff) == 0u) ? 1u : 2u) + + count_simple_li_instructions(low) + + /*SLLI+ADD*/ 2u; + if (new_insns_needed < insns_needed) { + DCHECK_NE(low & 0xfffff000, 0); + XRegister tmp = srs.AllocateXRegister(); + if (IsInt<20>(high) && !IsInt<12>(high)) { + // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate. + Lui(rd, static_cast<uint32_t>(high & 0xfffff)); + slli_shamt -= 12; + } else { + emit_simple_li(rd, high); + } + emit_simple_li(tmp, low); + Slli(rd, rd, slli_shamt); + Add(rd, rd, tmp); + return; + } + } + } + emit_with_slli_addi(rd, trailing_slli_shamt != 0u ? imm >> trailing_slli_shamt : imm); + if (trailing_slli_shamt != 0u) { + Slli(rd, rd, trailing_slli_shamt); + } +} + +/////////////////////////////// RV64 VARIANTS extension end //////////// + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h new file mode 100644 index 0000000000..0e0a40d52a --- /dev/null +++ b/compiler/utils/riscv64/assembler_riscv64.h @@ -0,0 +1,1181 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_ +#define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_ + +#include <cstdint> +#include <string> +#include <utility> +#include <vector> + +#include "arch/riscv64/instruction_set_features_riscv64.h" +#include "base/arena_containers.h" +#include "base/enums.h" +#include "base/globals.h" +#include "base/macros.h" +#include "managed_register_riscv64.h" +#include "utils/assembler.h" +#include "utils/label.h" + +namespace art HIDDEN { +namespace riscv64 { + +class ScratchRegisterScope; + +static constexpr size_t kRiscv64HalfwordSize = 2; +static constexpr size_t kRiscv64WordSize = 4; +static constexpr size_t kRiscv64DoublewordSize = 8; +static constexpr size_t kRiscv64FloatRegSizeInBytes = 8; + +enum class FPRoundingMode : uint32_t { + kRNE = 0x0, // Round to Nearest, ties to Even + kRTZ = 0x1, // Round towards Zero + kRDN = 0x2, // Round Down (towards −Infinity) + kRUP = 0x3, // Round Up (towards +Infinity) + kRMM = 0x4, // Round to Nearest, ties to Max Magnitude + kDYN = 0x7, // Dynamic rounding mode + kDefault = kDYN, + // Some instructions never need to round even though the spec includes the RM field. + // To simplify testing, emit the RM as 0 by default for these instructions because that's what + // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes. + kIgnored = 0 +}; + +enum class AqRl : uint32_t { + kNone = 0x0, + kRelease = 0x1, + kAcquire = 0x2, + kAqRl = kRelease | kAcquire +}; + +// the type for fence +enum FenceType { + kFenceNone = 0, + kFenceWrite = 1, + kFenceRead = 2, + kFenceOutput = 4, + kFenceInput = 8, + kFenceDefault = 0xf, +}; + +// Used to test the values returned by FClassS/FClassD. +enum FPClassMaskType { + kNegativeInfinity = 0x001, + kNegativeNormal = 0x002, + kNegativeSubnormal = 0x004, + kNegativeZero = 0x008, + kPositiveZero = 0x010, + kPositiveSubnormal = 0x020, + kPositiveNormal = 0x040, + kPositiveInfinity = 0x080, + kSignalingNaN = 0x100, + kQuietNaN = 0x200, +}; + +class Riscv64Label : public Label { + public: + Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {} + + Riscv64Label(Riscv64Label&& src) noexcept + // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move + : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {} + + private: + static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max(); + + uint32_t prev_branch_id_; // To get distance from preceding branch, if any. + + friend class Riscv64Assembler; + DISALLOW_COPY_AND_ASSIGN(Riscv64Label); +}; + +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { return size_; } + + const uint8_t* GetData() const { return data_; } + + Riscv64Label* GetLabel() { return &label_; } + + const Riscv64Label* GetLabel() const { return &label_; } + + private: + Riscv64Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + +// Jump table: table of labels emitted after the code and before the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {} + + size_t GetSize() const { return labels_.size() * sizeof(int32_t); } + + const ArenaVector<Riscv64Label*>& GetData() const { return labels_; } + + Riscv64Label* GetLabel() { return &label_; } + + const Riscv64Label* GetLabel() const { return &label_; } + + private: + Riscv64Label label_; + ArenaVector<Riscv64Label*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + +class Riscv64Assembler final : public Assembler { + public: + explicit Riscv64Assembler(ArenaAllocator* allocator, + const Riscv64InstructionSetFeatures* instruction_set_features = nullptr) + : Assembler(allocator), + branches_(allocator->Adapter(kArenaAllocAssembler)), + finalized_(false), + overwriting_(false), + overwrite_location_(0), + literals_(allocator->Adapter(kArenaAllocAssembler)), + long_literals_(allocator->Adapter(kArenaAllocAssembler)), + jump_tables_(allocator->Adapter(kArenaAllocAssembler)), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0), + available_scratch_core_registers_((1u << TMP) | (1u << TMP2)), + available_scratch_fp_registers_(1u << FTMP) { + UNUSED(instruction_set_features); + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Riscv64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } + + size_t CodeSize() const override { return Assembler::CodeSize(); } + DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } + + // According to "The RISC-V Instruction Set Manual" + + // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37 + // Note: These take a 20-bit unsigned value to align with the clang assembler for testing, + // but the value stored in the register shall actually be sign-extended to 64 bits. + void Lui(XRegister rd, uint32_t imm20); + void Auipc(XRegister rd, uint32_t imm20); + + // Jump instructions (RV32I), opcode = 0x67, 0x6f + void Jal(XRegister rd, int32_t offset); + void Jalr(XRegister rd, XRegister rs1, int32_t offset); + + // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7 + void Beq(XRegister rs1, XRegister rs2, int32_t offset); + void Bne(XRegister rs1, XRegister rs2, int32_t offset); + void Blt(XRegister rs1, XRegister rs2, int32_t offset); + void Bge(XRegister rs1, XRegister rs2, int32_t offset); + void Bltu(XRegister rs1, XRegister rs2, int32_t offset); + void Bgeu(XRegister rs1, XRegister rs2, int32_t offset); + + // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6 + void Lb(XRegister rd, XRegister rs1, int32_t offset); + void Lh(XRegister rd, XRegister rs1, int32_t offset); + void Lw(XRegister rd, XRegister rs1, int32_t offset); + void Ld(XRegister rd, XRegister rs1, int32_t offset); + void Lbu(XRegister rd, XRegister rs1, int32_t offset); + void Lhu(XRegister rd, XRegister rs1, int32_t offset); + void Lwu(XRegister rd, XRegister rs1, int32_t offset); + + // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3 + void Sb(XRegister rs2, XRegister rs1, int32_t offset); + void Sh(XRegister rs2, XRegister rs1, int32_t offset); + void Sw(XRegister rs2, XRegister rs1, int32_t offset); + void Sd(XRegister rs2, XRegister rs1, int32_t offset); + + // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7 + void Addi(XRegister rd, XRegister rs1, int32_t imm12); + void Slti(XRegister rd, XRegister rs1, int32_t imm12); + void Sltiu(XRegister rd, XRegister rs1, int32_t imm12); + void Xori(XRegister rd, XRegister rs1, int32_t imm12); + void Ori(XRegister rd, XRegister rs1, int32_t imm12); + void Andi(XRegister rd, XRegister rs1, int32_t imm12); + void Slli(XRegister rd, XRegister rs1, int32_t shamt); + void Srli(XRegister rd, XRegister rs1, int32_t shamt); + void Srai(XRegister rd, XRegister rs1, int32_t shamt); + + // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7 + void Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sub(XRegister rd, XRegister rs1, XRegister rs2); + void Slt(XRegister rd, XRegister rs1, XRegister rs2); + void Sltu(XRegister rd, XRegister rs1, XRegister rs2); + void Xor(XRegister rd, XRegister rs1, XRegister rs2); + void Or(XRegister rd, XRegister rs1, XRegister rs2); + void And(XRegister rd, XRegister rs1, XRegister rs2); + void Sll(XRegister rd, XRegister rs1, XRegister rs2); + void Srl(XRegister rd, XRegister rs1, XRegister rs2); + void Sra(XRegister rd, XRegister rs1, XRegister rs2); + + // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5 + void Addiw(XRegister rd, XRegister rs1, int32_t imm12); + void Slliw(XRegister rd, XRegister rs1, int32_t shamt); + void Srliw(XRegister rd, XRegister rs1, int32_t shamt); + void Sraiw(XRegister rd, XRegister rs1, int32_t shamt); + + // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7 + void Addw(XRegister rd, XRegister rs1, XRegister rs2); + void Subw(XRegister rd, XRegister rs1, XRegister rs2); + void Sllw(XRegister rd, XRegister rs1, XRegister rs2); + void Srlw(XRegister rd, XRegister rs1, XRegister rs2); + void Sraw(XRegister rd, XRegister rs1, XRegister rs2); + + // Environment call and breakpoint (RV32I), opcode = 0x73 + void Ecall(); + void Ebreak(); + + // Fence instruction (RV32I): opcode = 0xf, funct3 = 0 + void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault); + void FenceTso(); + + // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1 + void FenceI(); + + // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7 + void Mul(XRegister rd, XRegister rs1, XRegister rs2); + void Mulh(XRegister rd, XRegister rs1, XRegister rs2); + void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2); + void Mulhu(XRegister rd, XRegister rs1, XRegister rs2); + void Div(XRegister rd, XRegister rs1, XRegister rs2); + void Divu(XRegister rd, XRegister rs1, XRegister rs2); + void Rem(XRegister rd, XRegister rs1, XRegister rs2); + void Remu(XRegister rd, XRegister rs1, XRegister rs2); + + // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7 + void Mulw(XRegister rd, XRegister rs1, XRegister rs2); + void Divw(XRegister rd, XRegister rs1, XRegister rs2); + void Divuw(XRegister rd, XRegister rs1, XRegister rs2); + void Remw(XRegister rd, XRegister rs1, XRegister rs2); + void Remuw(XRegister rd, XRegister rs1, XRegister rs2); + + // RV32A/RV64A Standard Extension + void LrW(XRegister rd, XRegister rs1, AqRl aqrl); + void LrD(XRegister rd, XRegister rs1, AqRl aqrl); + void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + + // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7 + void Csrrw(XRegister rd, uint32_t csr, XRegister rs1); + void Csrrs(XRegister rd, uint32_t csr, XRegister rs1); + void Csrrc(XRegister rd, uint32_t csr, XRegister rs1); + void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5); + void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5); + void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5); + + // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27 + void FLw(FRegister rd, XRegister rs1, int32_t offset); + void FLd(FRegister rd, XRegister rs1, int32_t offset); + void FSw(FRegister rs2, XRegister rs1, int32_t offset); + void FSd(FRegister rs2, XRegister rs1, int32_t offset); + + // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f + void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + + // FP FMA instruction helpers passing the default rounding mode. + void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + + // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D + void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm); + void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm); + void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2); + void FMinS(FRegister rd, FRegister rs1, FRegister rs2); + void FMinD(FRegister rd, FRegister rs1, FRegister rs2); + void FMaxS(FRegister rd, FRegister rs1, FRegister rs2); + void FMaxD(FRegister rd, FRegister rs1, FRegister rs2); + void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm); + + // Simple FP instruction helpers passing the default rounding mode. + void FAddS(FRegister rd, FRegister rs1, FRegister rs2) { + FAddS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FAddD(FRegister rd, FRegister rs1, FRegister rs2) { + FAddD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FSubS(FRegister rd, FRegister rs1, FRegister rs2) { + FSubS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FSubD(FRegister rd, FRegister rs1, FRegister rs2) { + FSubD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FMulS(FRegister rd, FRegister rs1, FRegister rs2) { + FMulS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FMulD(FRegister rd, FRegister rs1, FRegister rs2) { + FMulD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FDivS(FRegister rd, FRegister rs1, FRegister rs2) { + FDivS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FDivD(FRegister rd, FRegister rs1, FRegister rs2) { + FDivD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FSqrtS(FRegister rd, FRegister rs1) { + FSqrtS(rd, rs1, FPRoundingMode::kDefault); + } + void FSqrtD(FRegister rd, FRegister rs1) { + FSqrtD(rd, rs1, FPRoundingMode::kDefault); + } + void FCvtSD(FRegister rd, FRegister rs1) { + FCvtSD(rd, rs1, FPRoundingMode::kDefault); + } + void FCvtDS(FRegister rd, FRegister rs1) { + FCvtDS(rd, rs1, FPRoundingMode::kIgnored); + } + + // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D + void FEqS(XRegister rd, FRegister rs1, FRegister rs2); + void FEqD(XRegister rd, FRegister rs1, FRegister rs2); + void FLtS(XRegister rd, FRegister rs1, FRegister rs2); + void FLtD(XRegister rd, FRegister rs1, FRegister rs2); + void FLeS(XRegister rd, FRegister rs1, FRegister rs2); + void FLeD(XRegister rd, FRegister rs1, FRegister rs2); + + // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D + void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm); + + // FP conversion instruction helpers passing the default rounding mode. + void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); } + void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); } + void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); } + void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); } + + // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D + void FMvXW(XRegister rd, FRegister rs1); + void FMvXD(XRegister rd, FRegister rs1); + void FMvWX(FRegister rd, XRegister rs1); + void FMvDX(FRegister rd, XRegister rs1); + + // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D + void FClassS(XRegister rd, FRegister rs1); + void FClassD(XRegister rd, FRegister rs1); + + // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies. + void AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void SlliUw(XRegister rd, XRegister rs1, int32_t shamt); + + // "Zbb" Standard Extension, opcode = 0x13, 0x1b or 0x33, funct3 and funct7 varies. + // Note: We do not support 32-bit sext.b, sext.h and zext.h from the Zbb extension. + // (Neither does the clang-r498229's assembler which we currently test against.) + void Andn(XRegister rd, XRegister rs1, XRegister rs2); + void Orn(XRegister rd, XRegister rs1, XRegister rs2); + void Xnor(XRegister rd, XRegister rs1, XRegister rs2); + void Clz(XRegister rd, XRegister rs1); + void Clzw(XRegister rd, XRegister rs1); + void Ctz(XRegister rd, XRegister rs1); + void Ctzw(XRegister rd, XRegister rs1); + void Cpop(XRegister rd, XRegister rs1); + void Cpopw(XRegister rd, XRegister rs1); + void Min(XRegister rd, XRegister rs1, XRegister rs2); + void Minu(XRegister rd, XRegister rs1, XRegister rs2); + void Max(XRegister rd, XRegister rs1, XRegister rs2); + void Maxu(XRegister rd, XRegister rs1, XRegister rs2); + void Rol(XRegister rd, XRegister rs1, XRegister rs2); + void Rolw(XRegister rd, XRegister rs1, XRegister rs2); + void Ror(XRegister rd, XRegister rs1, XRegister rs2); + void Rorw(XRegister rd, XRegister rs1, XRegister rs2); + void Rori(XRegister rd, XRegister rs1, int32_t shamt); + void Roriw(XRegister rd, XRegister rs1, int32_t shamt); + void OrcB(XRegister rd, XRegister rs1); + void Rev8(XRegister rd, XRegister rs1); + + ////////////////////////////// RV64 MACRO Instructions START /////////////////////////////// + // These pseudo instructions are from "RISC-V Assembly Programmer's Manual". + + void Nop(); + void Li(XRegister rd, int64_t imm); + void Mv(XRegister rd, XRegister rs); + void Not(XRegister rd, XRegister rs); + void Neg(XRegister rd, XRegister rs); + void NegW(XRegister rd, XRegister rs); + void SextB(XRegister rd, XRegister rs); + void SextH(XRegister rd, XRegister rs); + void SextW(XRegister rd, XRegister rs); + void ZextB(XRegister rd, XRegister rs); + void ZextH(XRegister rd, XRegister rs); + void ZextW(XRegister rd, XRegister rs); + void Seqz(XRegister rd, XRegister rs); + void Snez(XRegister rd, XRegister rs); + void Sltz(XRegister rd, XRegister rs); + void Sgtz(XRegister rd, XRegister rs); + void FMvS(FRegister rd, FRegister rs); + void FAbsS(FRegister rd, FRegister rs); + void FNegS(FRegister rd, FRegister rs); + void FMvD(FRegister rd, FRegister rs); + void FAbsD(FRegister rd, FRegister rs); + void FNegD(FRegister rd, FRegister rs); + + // Branch pseudo instructions + void Beqz(XRegister rs, int32_t offset); + void Bnez(XRegister rs, int32_t offset); + void Blez(XRegister rs, int32_t offset); + void Bgez(XRegister rs, int32_t offset); + void Bltz(XRegister rs, int32_t offset); + void Bgtz(XRegister rs, int32_t offset); + void Bgt(XRegister rs, XRegister rt, int32_t offset); + void Ble(XRegister rs, XRegister rt, int32_t offset); + void Bgtu(XRegister rs, XRegister rt, int32_t offset); + void Bleu(XRegister rs, XRegister rt, int32_t offset); + + // Jump pseudo instructions + void J(int32_t offset); + void Jal(int32_t offset); + void Jr(XRegister rs); + void Jalr(XRegister rs); + void Jalr(XRegister rd, XRegister rs); + void Ret(); + + // Pseudo instructions for accessing control and status registers + void RdCycle(XRegister rd); + void RdTime(XRegister rd); + void RdInstret(XRegister rd); + void Csrr(XRegister rd, uint32_t csr); + void Csrw(uint32_t csr, XRegister rs); + void Csrs(uint32_t csr, XRegister rs); + void Csrc(uint32_t csr, XRegister rs); + void Csrwi(uint32_t csr, uint32_t uimm5); + void Csrsi(uint32_t csr, uint32_t uimm5); + void Csrci(uint32_t csr, uint32_t uimm5); + + // Load/store macros for arbitrary 32-bit offsets. + void Loadb(XRegister rd, XRegister rs1, int32_t offset); + void Loadh(XRegister rd, XRegister rs1, int32_t offset); + void Loadw(XRegister rd, XRegister rs1, int32_t offset); + void Loadd(XRegister rd, XRegister rs1, int32_t offset); + void Loadbu(XRegister rd, XRegister rs1, int32_t offset); + void Loadhu(XRegister rd, XRegister rs1, int32_t offset); + void Loadwu(XRegister rd, XRegister rs1, int32_t offset); + void Storeb(XRegister rs2, XRegister rs1, int32_t offset); + void Storeh(XRegister rs2, XRegister rs1, int32_t offset); + void Storew(XRegister rs2, XRegister rs1, int32_t offset); + void Stored(XRegister rs2, XRegister rs1, int32_t offset); + void FLoadw(FRegister rd, XRegister rs1, int32_t offset); + void FLoadd(FRegister rd, XRegister rs1, int32_t offset); + void FStorew(FRegister rs2, XRegister rs1, int32_t offset); + void FStored(FRegister rs2, XRegister rs1, int32_t offset); + + // Macros for loading constants. + void LoadConst32(XRegister rd, int32_t value); + void LoadConst64(XRegister rd, int64_t value); + + // Macros for adding constants. + void AddConst32(XRegister rd, XRegister rs1, int32_t value); + void AddConst64(XRegister rd, XRegister rs1, int64_t value); + + // Jumps and branches to a label. + void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false); + void J(Riscv64Label* label, bool is_bare = false); + void Jal(Riscv64Label* label, bool is_bare = false); + + // Literal load. + void Loadw(XRegister rd, Literal* literal); + void Loadwu(XRegister rd, Literal* literal); + void Loadd(XRegister rd, Literal* literal); + void FLoadw(FRegister rd, Literal* literal); + void FLoadd(FRegister rd, Literal* literal); + + // Illegal instruction that triggers SIGILL. + void Unimp(); + + /////////////////////////////// RV64 MACRO Instructions END /////////////////////////////// + + void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); } + + void Jump([[maybe_unused]] Label* label) override { + UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64"; + } + + void Jump(Riscv64Label* label) { + J(label); + } + + void Bind(Riscv64Label* label); + + // Load label address using PC-relative loads. + void LoadLabelAddress(XRegister rd, Riscv64Label* label); + + // Create a new literal with a given value. + // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels); + + public: + // Emit slow paths queued during assembly, promote short branches to long if needed, + // and emit branches. + void FinalizeCode() override; + + // Returns the current location of a label. + // + // This function must be used instead of `Riscv64Label::GetPosition()` + // which returns assembler's internal data instead of an actual location. + // + // The location can change during branch fixup in `FinalizeCode()`. Before that, + // the location is not final and therefore not very useful to external users, + // so they should preferably retrieve the location only after `FinalizeCode()`. + uint32_t GetLabelLocation(const Riscv64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + private: + enum BranchCondition : uint8_t { + kCondEQ, + kCondNE, + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTU, + kCondGEU, + kCondLEU, + kCondGTU, + kUncond, + }; + + // Note that PC-relative literal loads are handled as pseudo branches because they need + // to be emitted after branch relocation to use correct offsets. + class Branch { + public: + enum Type : uint8_t { + // TODO(riscv64): Support 16-bit instructions ("C" Standard Extension). + + // Short branches (can be promoted to longer). + kCondBranch, + kUncondBranch, + kCall, + // Short branches (can't be promoted to longer). + kBareCondBranch, + kBareUncondBranch, + kBareCall, + + // Medium branch (can be promoted to long). + kCondBranch21, + + // Long branches. + kLongCondBranch, + kLongUncondBranch, + kLongCall, + + // Label. + kLabel, + + // Literals. + kLiteral, + kLiteralUnsigned, + kLiteralLong, + kLiteralFloat, + kLiteralDouble, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset13 = 13, + kOffset21 = 21, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr uint32_t kMaxBranchLength = 12; // In bytes. + + struct BranchInfo { + // Branch length in bytes. + uint32_t length; + // The offset in bytes of the PC used in the (only) PC-relative instruction from + // the start of the branch sequence. RISC-V always uses the address of the PC-relative + // instruction as the PC, so this is essentially the offset of that instruction. + uint32_t pc_offset; + // How large (in bits) a PC-relative offset can be for a given type of branch. + OffsetBits offset_size; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch or call. + Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + XRegister lhs_reg, + XRegister rhs_reg, + bool is_bare); + // Label address or literal. + Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type); + Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. + static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs); + static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + XRegister GetLeftRegister() const; + XRegister GetRightRegister() const; + FRegister GetFRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsBare() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If necessary, updates the type by promoting a short branch to a longer branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + uint32_t PromoteIfNeeded(); + + // Returns the offset into assembler buffer that shall be used as the base PC for + // offset calculation. RISC-V always uses the address of the PC-relative instruction + // as the PC, so this is essentially the location of that instruction. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + int32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(Type initial_type); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + XRegister lhs_reg_; // Left-hand side register in conditional branches or + // destination register in calls or literals. + XRegister rhs_reg_; // Right-hand side register in conditional branches. + FRegister freg_; // Destination register in FP literals. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + + // Branch and literal fixup. + + void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset); + void EmitBranch(Branch* branch); + void EmitBranches(); + void EmitJumpTables(); + void EmitLiterals(); + + void FinalizeLabeledBranch(Riscv64Label* label); + void Bcond(Riscv64Label* label, + bool is_bare, + BranchCondition condition, + XRegister lhs, + XRegister rhs); + void Buncond(Riscv64Label* label, XRegister rd, bool is_bare); + template <typename XRegisterOrFRegister> + void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type); + + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void ReserveJumpTableSpace(); + void PromoteBranches(); + void PatchCFI(); + + // Emit data (e.g. encoded instruction or immediate) to the instruction stream. + void Emit(uint32_t value); + + // Adjust base register and offset if needed for load/store with a large offset. + void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs); + + // Helper templates for loads/stores with 32-bit offsets. + template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> + void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> + void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> + void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> + void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset); + + // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`. + void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp); + + // Emit helpers. + + // I-type instruction: + // + // 31 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ imm11:0 rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2> + void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) { + DCHECK(IsInt<12>(imm12)) << imm12; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 | + funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // R-type instruction: + // + // 31 25 24 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ funct7 rs2 rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2, typename Reg3> + void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) { + DCHECK(IsUint<7>(funct7)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 | + static_cast<uint32_t>(rs1) << 15 | funct3 << 12 | + static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB): + // + // 31 27 25 24 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ rs3 fmt rs2 rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2, typename Reg3, typename Reg4> + void EmitR4( + Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) { + DCHECK(IsUint<5>(static_cast<uint32_t>(rs3))); + DCHECK(IsUint<2>(fmt)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 | + static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 | + static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 | + opcode; + Emit(encoding); + } + + // S-type instruction: + // + // 31 25 24 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ imm11:5 rs2 rs1 funct3 imm4:0 opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2> + void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) { + DCHECK(IsInt<12>(imm12)) << imm12; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 | + static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 | + static_cast<uint32_t>(funct3) << 12 | + (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode; + Emit(encoding); + } + + // I-type instruction variant for shifts (SLLI / SRLI / SRAI): + // + // 31 26 25 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ imm11:6 imm5:0(shamt) rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + void EmitI6(uint32_t funct6, + uint32_t imm6, + XRegister rs1, + uint32_t funct3, + XRegister rd, + uint32_t opcode) { + DCHECK(IsUint<6>(funct6)); + DCHECK(IsUint<6>(imm6)) << imm6; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 | + static_cast<uint32_t>(rs1) << 15 | funct3 << 12 | + static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // B-type instruction: + // + // 31 30 25 24 20 19 15 14 12 11 8 7 6 0 + // ----------------------------------------------------------------- + // [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ] + // imm12 imm11:5 rs2 rs1 funct3 imm4:1 imm11 opcode ] + // ----------------------------------------------------------------- + void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) { + DCHECK_ALIGNED(offset, 2); + DCHECK(IsInt<13>(offset)) << offset; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<7>(opcode)); + uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu; + uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) | + static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 | + static_cast<uint32_t>(funct3) << 12 | + (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode; + Emit(encoding); + } + + // U-type instruction: + // + // 31 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ] + // [ imm31:12 rd opcode ] + // ----------------------------------------------------------------- + void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) { + CHECK(IsUint<20>(imm20)) << imm20; + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // J-type instruction: + // + // 31 30 21 19 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ] + // imm20 imm10:1 imm11 imm19:12 rd opcode ] + // ----------------------------------------------------------------- + void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) { + DCHECK_ALIGNED(offset, 2); + CHECK(IsInt<21>(offset)) << offset; + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu; + uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 | + (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) | + static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + ArenaVector<Branch> branches_; + + // For checking that we finalize the code only once. + bool finalized_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Use `std::deque<>` for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. + + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + + // Data for `GetAdjustedPosition()`, see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + uint32_t available_scratch_core_registers_; + uint32_t available_scratch_fp_registers_; + + static constexpr uint32_t kXlen = 64; + + friend class ScratchRegisterScope; + + DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler); +}; + +class ScratchRegisterScope { + public: + explicit ScratchRegisterScope(Riscv64Assembler* assembler) + : assembler_(assembler), + old_available_scratch_core_registers_(assembler->available_scratch_core_registers_), + old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {} + + ~ScratchRegisterScope() { + assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_; + assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_; + } + + // Alocate a scratch `XRegister`. There must be an available register to allocate. + XRegister AllocateXRegister() { + CHECK_NE(assembler_->available_scratch_core_registers_, 0u); + // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)). + uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) - + CLZ(assembler_->available_scratch_core_registers_); + assembler_->available_scratch_core_registers_ &= ~(1u << reg_num); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + return enum_cast<XRegister>(reg_num); + } + + // Free a previously unavailable core register for use as a scratch register. + // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`. + void FreeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u); + assembler_->available_scratch_core_registers_ |= 1u << reg_num; + } + + // The number of available scratch core registers. + size_t AvailableXRegisters() { + return POPCOUNT(assembler_->available_scratch_core_registers_); + } + + // Make sure a core register is available for use as a scratch register. + void IncludeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + assembler_->available_scratch_core_registers_ |= 1u << reg_num; + } + + // Make sure a core register is not available for use as a scratch register. + void ExcludeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + assembler_->available_scratch_core_registers_ &= ~(1u << reg_num); + } + + // Alocate a scratch `FRegister`. There must be an available register to allocate. + FRegister AllocateFRegister() { + CHECK_NE(assembler_->available_scratch_fp_registers_, 0u); + // Allocate the highest available scratch register (same as for core registers). + uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) - + CLZ(assembler_->available_scratch_fp_registers_); + assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + return enum_cast<FRegister>(reg_num); + } + + // Free a previously unavailable FP register for use as a scratch register. + // This can be an arbitrary register, not necessarly the usual `FTMP`. + void FreeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u); + assembler_->available_scratch_fp_registers_ |= 1u << reg_num; + } + + // The number of available scratch FP registers. + size_t AvailableFRegisters() { + return POPCOUNT(assembler_->available_scratch_fp_registers_); + } + + // Make sure an FP register is available for use as a scratch register. + void IncludeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + assembler_->available_scratch_fp_registers_ |= 1u << reg_num; + } + + // Make sure an FP register is not available for use as a scratch register. + void ExcludeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num); + } + + private: + Riscv64Assembler* const assembler_; + const uint32_t old_available_scratch_core_registers_; + const uint32_t old_available_scratch_fp_registers_; + + DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope); +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_ diff --git a/compiler/utils/riscv64/assembler_riscv64_test.cc b/compiler/utils/riscv64/assembler_riscv64_test.cc new file mode 100644 index 0000000000..21baa74323 --- /dev/null +++ b/compiler/utils/riscv64/assembler_riscv64_test.cc @@ -0,0 +1,3002 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_riscv64.h" + +#include <inttypes.h> + +#include <map> + +#include "base/bit_utils.h" +#include "utils/assembler_test.h" + +#define __ GetAssembler()-> + +namespace art HIDDEN { +namespace riscv64 { + +struct RISCV64CpuRegisterCompare { + bool operator()(const XRegister& a, const XRegister& b) const { return a < b; } +}; + +class AssemblerRISCV64Test : public AssemblerTest<Riscv64Assembler, + Riscv64Label, + XRegister, + FRegister, + int32_t> { + public: + using Base = AssemblerTest<Riscv64Assembler, + Riscv64Label, + XRegister, + FRegister, + int32_t>; + + AssemblerRISCV64Test() + : instruction_set_features_(Riscv64InstructionSetFeatures::FromVariant("default", nullptr)) {} + + protected: + Riscv64Assembler* CreateAssembler(ArenaAllocator* allocator) override { + return new (allocator) Riscv64Assembler(allocator, instruction_set_features_.get()); + } + + InstructionSet GetIsa() override { return InstructionSet::kRiscv64; } + + // Clang's assembler takes advantage of certain extensions for emitting constants with `li` + // but our assembler does not. For now, we use a simple `-march` to avoid the divergence. + // TODO(riscv64): Implement these more efficient patterns in assembler. + void SetUseSimpleMarch(bool value) { + use_simple_march_ = value; + } + + std::vector<std::string> GetAssemblerCommand() override { + std::vector<std::string> result = Base::GetAssemblerCommand(); + if (use_simple_march_) { + auto it = std::find_if(result.begin(), + result.end(), + [](const std::string& s) { return StartsWith(s, "-march="); }); + CHECK(it != result.end()); + *it = "-march=rv64imafd"; + } + return result; + } + + std::vector<std::string> GetDisassemblerCommand() override { + std::vector<std::string> result = Base::GetDisassemblerCommand(); + if (use_simple_march_) { + auto it = std::find_if(result.begin(), + result.end(), + [](const std::string& s) { return StartsWith(s, "--mattr="); }); + CHECK(it != result.end()); + *it = "--mattr=+F,+D,+A"; + } + return result; + } + + void SetUpHelpers() override { + if (secondary_register_names_.empty()) { + secondary_register_names_.emplace(Zero, "zero"); + secondary_register_names_.emplace(RA, "ra"); + secondary_register_names_.emplace(SP, "sp"); + secondary_register_names_.emplace(GP, "gp"); + secondary_register_names_.emplace(TP, "tp"); + secondary_register_names_.emplace(T0, "t0"); + secondary_register_names_.emplace(T1, "t1"); + secondary_register_names_.emplace(T2, "t2"); + secondary_register_names_.emplace(S0, "s0"); // s0/fp + secondary_register_names_.emplace(S1, "s1"); + secondary_register_names_.emplace(A0, "a0"); + secondary_register_names_.emplace(A1, "a1"); + secondary_register_names_.emplace(A2, "a2"); + secondary_register_names_.emplace(A3, "a3"); + secondary_register_names_.emplace(A4, "a4"); + secondary_register_names_.emplace(A5, "a5"); + secondary_register_names_.emplace(A6, "a6"); + secondary_register_names_.emplace(A7, "a7"); + secondary_register_names_.emplace(S2, "s2"); + secondary_register_names_.emplace(S3, "s3"); + secondary_register_names_.emplace(S4, "s4"); + secondary_register_names_.emplace(S5, "s5"); + secondary_register_names_.emplace(S6, "s6"); + secondary_register_names_.emplace(S7, "s7"); + secondary_register_names_.emplace(S8, "s8"); + secondary_register_names_.emplace(S9, "s9"); + secondary_register_names_.emplace(S10, "s10"); + secondary_register_names_.emplace(S11, "s11"); + secondary_register_names_.emplace(T3, "t3"); + secondary_register_names_.emplace(T4, "t4"); + secondary_register_names_.emplace(T5, "t5"); + secondary_register_names_.emplace(T6, "t6"); + } + } + + void TearDown() override { + AssemblerTest::TearDown(); + } + + std::vector<Riscv64Label> GetAddresses() override { + UNIMPLEMENTED(FATAL) << "Feature not implemented yet"; + UNREACHABLE(); + } + + ArrayRef<const XRegister> GetRegisters() override { + static constexpr XRegister kXRegisters[] = { + Zero, + RA, + SP, + GP, + TP, + T0, + T1, + T2, + S0, + S1, + A0, + A1, + A2, + A3, + A4, + A5, + A6, + A7, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + T3, + T4, + T5, + T6, + }; + return ArrayRef<const XRegister>(kXRegisters); + } + + ArrayRef<const FRegister> GetFPRegisters() override { + static constexpr FRegister kFRegisters[] = { + FT0, + FT1, + FT2, + FT3, + FT4, + FT5, + FT6, + FT7, + FS0, + FS1, + FA0, + FA1, + FA2, + FA3, + FA4, + FA5, + FA6, + FA7, + FS2, + FS3, + FS4, + FS5, + FS6, + FS7, + FS8, + FS9, + FS10, + FS11, + FT8, + FT9, + FT10, + FT11, + }; + return ArrayRef<const FRegister>(kFRegisters); + } + + std::string GetSecondaryRegisterName(const XRegister& reg) override { + CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); + return secondary_register_names_[reg]; + } + + int32_t CreateImmediate(int64_t imm_value) override { + return dchecked_integral_cast<int32_t>(imm_value); + } + + template <typename Emit> + std::string RepeatInsn(size_t count, const std::string& insn, Emit&& emit) { + std::string result; + for (; count != 0u; --count) { + result += insn; + emit(); + } + return result; + } + + std::string EmitNops(size_t size) { + // TODO(riscv64): Support "C" Standard Extension. + DCHECK_ALIGNED(size, sizeof(uint32_t)); + const size_t num_nops = size / sizeof(uint32_t); + return RepeatInsn(num_nops, "nop\n", [&]() { __ Nop(); }); + } + + template <typename EmitLoadConst> + void TestLoadConst64(const std::string& test_name, + bool can_use_tmp, + EmitLoadConst&& emit_load_const) { + std::string expected; + // Test standard immediates. Unlike other instructions, `Li()` accepts an `int64_t` but + // this is unsupported by `CreateImmediate()`, so we cannot use `RepeatRIb()` for these. + // Note: This `CreateImmediateValuesBits()` call does not produce any values where + // `LoadConst64()` would emit different code from `Li()`. + for (int64_t value : CreateImmediateValuesBits(64, /*as_uint=*/ false)) { + emit_load_const(A0, value); + expected += "li a0, " + std::to_string(value) + "\n"; + } + // Test various registers with a few small values. + // (Even Zero is an accepted register even if that does not really load the requested value.) + for (XRegister reg : GetRegisters()) { + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(reg); + std::string rd = GetRegisterName(reg); + emit_load_const(reg, -1); + expected += "li " + rd + ", -1\n"; + emit_load_const(reg, 0); + expected += "li " + rd + ", 0\n"; + emit_load_const(reg, 1); + expected += "li " + rd + ", 1\n"; + } + // Test some significant values. Some may just repeat the tests above but other values + // show some complex patterns, even exposing a value where clang (and therefore also this + // assembler) does not generate the shortest sequence. + // For the following values, `LoadConst64()` emits the same code as `Li()`. + int64_t test_values1[] = { + // Small values, either ADDI, ADDI+SLLI, LUI, or LUI+ADDIW. + // The ADDI+LUI is presumably used to allow shorter code for RV64C. + -4097, -4096, -4095, -2176, -2049, -2048, -2047, -1025, -1024, -1023, -2, -1, + 0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049, 2176, 4095, 4096, 4097, + // Just below std::numeric_limits<int32_t>::min() + INT64_C(-0x80000001), // LUI+ADDI + INT64_C(-0x80000800), // LUI+ADDI + INT64_C(-0x80000801), // LUI+ADDIW+SLLI+ADDI; LUI+ADDI+ADDI would be shorter. + INT64_C(-0x80000800123), // LUI+ADDIW+SLLI+ADDI + INT64_C(0x0123450000000123), // LUI+SLLI+ADDI + INT64_C(-0x7654300000000123), // LUI+SLLI+ADDI + INT64_C(0x0fffffffffff0000), // LUI+SRLI + INT64_C(0x0ffffffffffff000), // LUI+SRLI + INT64_C(0x0ffffffffffff010), // LUI+ADDIW+SRLI + INT64_C(0x0fffffffffffff10), // ADDI+SLLI+ADDI; LUI+ADDIW+SRLI would be same length. + INT64_C(0x0fffffffffffff80), // ADDI+SRLI + INT64_C(0x0ffffffff7ffff80), // LUI+ADDI+SRLI + INT64_C(0x0123450000001235), // LUI+SLLI+ADDI+SLLI+ADDI + INT64_C(0x0123450000001234), // LUI+SLLI+ADDI+SLLI + INT64_C(0x0000000fff808010), // LUI+SLLI+SRLI + INT64_C(0x00000000fff80801), // LUI+SLLI+SRLI + INT64_C(0x00000000ffffffff), // ADDI+SRLI + INT64_C(0x00000001ffffffff), // ADDI+SRLI + INT64_C(0x00000003ffffffff), // ADDI+SRLI + INT64_C(0x00000000ffc00801), // LUI+ADDIW+SLLI+ADDI + INT64_C(0x00000001fffff7fe), // ADDI+SLLI+SRLI + }; + for (int64_t value : test_values1) { + emit_load_const(A0, value); + expected += "li a0, " + std::to_string(value) + "\n"; + } + // For the following values, `LoadConst64()` emits different code than `Li()`. + std::pair<int64_t, const char*> test_values2[] = { + // Li: LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI + // LoadConst: LUI+ADDIW+LUI+ADDIW+SLLI+ADD (using TMP) + { INT64_C(0x1234567812345678), + "li {reg1}, 0x12345678 / 8\n" // Trailing zero bits in high word are handled by SLLI. + "li {reg2}, 0x12345678\n" + "slli {reg1}, {reg1}, 32 + 3\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(0x1234567887654321), + "li {reg1}, 0x12345678 + 1\n" // One higher to compensate for negative TMP. + "li {reg2}, 0x87654321 - 0x100000000\n" + "slli {reg1}, {reg1}, 32\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(-0x1234567887654321), + "li {reg1}, -0x12345678 - 1\n" // High 32 bits of the constant. + "li {reg2}, 0x100000000 - 0x87654321\n" // Low 32 bits of the constant. + "slli {reg1}, {reg1}, 32\n" + "add {reg1}, {reg1}, {reg2}\n" }, + + // Li: LUI+SLLI+ADDI+SLLI+ADDI+SLLI + // LoadConst: LUI+LUI+SLLI+ADD (using TMP) + { INT64_C(0x1234500012345000), + "lui {reg1}, 0x12345\n" + "lui {reg2}, 0x12345\n" + "slli {reg1}, {reg1}, 44 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(0x0123450012345000), + "lui {reg1}, 0x12345\n" + "lui {reg2}, 0x12345\n" + "slli {reg1}, {reg1}, 40 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + + // Li: LUI+ADDIW+SLLI+ADDI+SLLI+ADDI + // LoadConst: LUI+LUI+ADDIW+SLLI+ADD (using TMP) + { INT64_C(0x0001234512345678), + "lui {reg1}, 0x12345\n" + "li {reg2}, 0x12345678\n" + "slli {reg1}, {reg1}, 32 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(0x0012345012345678), + "lui {reg1}, 0x12345\n" + "li {reg2}, 0x12345678\n" + "slli {reg1}, {reg1}, 36 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + }; + for (auto [value, fmt] : test_values2) { + emit_load_const(A0, value); + if (can_use_tmp) { + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(A0), &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(TMP), &base); + expected += base; + } else { + expected += "li a0, " + std::to_string(value) + "\n"; + } + } + + DriverStr(expected, test_name); + } + + auto GetPrintBcond() { + return [](const std::string& cond, + [[maybe_unused]] const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + cond + args + ", " + target + "\n"; + }; + } + + auto GetPrintBcondOppositeAndJ(const std::string& skip_label) { + return [=]([[maybe_unused]] const std::string& cond, + const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + opposite_cond + args + ", " + skip_label + "f\n" + + "j " + target + "\n" + + skip_label + ":\n"; + }; + } + + auto GetPrintBcondOppositeAndTail(const std::string& skip_label, const std::string& base_label) { + return [=]([[maybe_unused]] const std::string& cond, + const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + opposite_cond + args + ", " + skip_label + "f\n" + + base_label + ":\n" + + "auipc t6, %pcrel_hi(" + target + ")\n" + + "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n" + + skip_label + ":\n"; + }; + } + + // Helper function for basic tests that all branch conditions map to the correct opcodes, + // whether with branch expansion (a conditional branch with opposite condition over an + // unconditional branch) or without. + template <typename PrintBcond> + std::string EmitBcondForAllConditions(Riscv64Label* label, + const std::string& target, + PrintBcond&& print_bcond, + bool is_bare) { + XRegister rs = A0; + __ Beqz(rs, label, is_bare); + __ Bnez(rs, label, is_bare); + __ Blez(rs, label, is_bare); + __ Bgez(rs, label, is_bare); + __ Bltz(rs, label, is_bare); + __ Bgtz(rs, label, is_bare); + XRegister rt = A1; + __ Beq(rs, rt, label, is_bare); + __ Bne(rs, rt, label, is_bare); + __ Ble(rs, rt, label, is_bare); + __ Bge(rs, rt, label, is_bare); + __ Blt(rs, rt, label, is_bare); + __ Bgt(rs, rt, label, is_bare); + __ Bleu(rs, rt, label, is_bare); + __ Bgeu(rs, rt, label, is_bare); + __ Bltu(rs, rt, label, is_bare); + __ Bgtu(rs, rt, label, is_bare); + + return + print_bcond("eq", "ne", "z a0", target) + + print_bcond("ne", "eq", "z a0", target) + + print_bcond("le", "gt", "z a0", target) + + print_bcond("ge", "lt", "z a0", target) + + print_bcond("lt", "ge", "z a0", target) + + print_bcond("gt", "le", "z a0", target) + + print_bcond("eq", "ne", " a0, a1", target) + + print_bcond("ne", "eq", " a0, a1", target) + + print_bcond("le", "gt", " a0, a1", target) + + print_bcond("ge", "lt", " a0, a1", target) + + print_bcond("lt", "ge", " a0, a1", target) + + print_bcond("gt", "le", " a0, a1", target) + + print_bcond("leu", "gtu", " a0, a1", target) + + print_bcond("geu", "ltu", " a0, a1", target) + + print_bcond("ltu", "geu", " a0, a1", target) + + print_bcond("gtu", "leu", " a0, a1", target); + } + + // Test Bcond for forward branches with all conditions. + // The gap must be such that either all branches expand, or none does. + template <typename PrintBcond> + void TestBcondForward(const std::string& test_name, + size_t gap_size, + const std::string& target_label, + PrintBcond&& print_bcond, + bool is_bare = false) { + std::string expected; + Riscv64Label label; + expected += EmitBcondForAllConditions(&label, target_label + "f", print_bcond, is_bare); + expected += EmitNops(gap_size); + __ Bind(&label); + expected += target_label + ":\n"; + DriverStr(expected, test_name); + } + + // Test Bcond for backward branches with all conditions. + // The gap must be such that either all branches expand, or none does. + template <typename PrintBcond> + void TestBcondBackward(const std::string& test_name, + size_t gap_size, + const std::string& target_label, + PrintBcond&& print_bcond, + bool is_bare = false) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += target_label + ":\n"; + expected += EmitNops(gap_size); + expected += EmitBcondForAllConditions(&label, target_label + "b", print_bcond, is_bare); + DriverStr(expected, test_name); + } + + size_t MaxOffset13BackwardDistance() { + return 4 * KB; + } + + size_t MaxOffset13ForwardDistance() { + // TODO(riscv64): Support "C" Standard Extension, max forward distance 4KiB - 2. + return 4 * KB - 4; + } + + size_t MaxOffset21BackwardDistance() { + return 1 * MB; + } + + size_t MaxOffset21ForwardDistance() { + // TODO(riscv64): Support "C" Standard Extension, max forward distance 1MiB - 2. + return 1 * MB - 4; + } + + template <typename PrintBcond> + void TestBeqA0A1Forward(const std::string& test_name, + size_t nops_size, + const std::string& target_label, + PrintBcond&& print_bcond, + bool is_bare = false) { + std::string expected; + Riscv64Label label; + __ Beq(A0, A1, &label, is_bare); + expected += print_bcond("eq", "ne", " a0, a1", target_label + "f"); + expected += EmitNops(nops_size); + __ Bind(&label); + expected += target_label + ":\n"; + DriverStr(expected, test_name); + } + + template <typename PrintBcond> + void TestBeqA0A1Backward(const std::string& test_name, + size_t nops_size, + const std::string& target_label, + PrintBcond&& print_bcond, + bool is_bare = false) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += target_label + ":\n"; + expected += EmitNops(nops_size); + __ Beq(A0, A1, &label, is_bare); + expected += print_bcond("eq", "ne", " a0, a1", target_label + "b"); + DriverStr(expected, test_name); + } + + // Test a branch setup where expanding one branch causes expanding another branch + // which causes expanding another branch, etc. The argument `cascade` determines + // whether we push the first branch to expand, or not. + template <typename PrintBcond> + void TestBeqA0A1MaybeCascade(const std::string& test_name, + bool cascade, + PrintBcond&& print_bcond) { + const size_t kNumBeqs = MaxOffset13ForwardDistance() / sizeof(uint32_t) / 2u; + auto label_name = [](size_t i) { return ".L" + std::to_string(i); }; + + std::string expected; + std::vector<Riscv64Label> labels(kNumBeqs); + for (size_t i = 0; i != kNumBeqs; ++i) { + __ Beq(A0, A1, &labels[i]); + expected += print_bcond("eq", "ne", " a0, a1", label_name(i)); + } + if (cascade) { + expected += EmitNops(sizeof(uint32_t)); + } + for (size_t i = 0; i != kNumBeqs; ++i) { + expected += EmitNops(2 * sizeof(uint32_t)); + __ Bind(&labels[i]); + expected += label_name(i) + ":\n"; + } + DriverStr(expected, test_name); + } + + auto GetPrintJalRd() { + return [=](XRegister rd, const std::string& target) { + std::string rd_name = GetRegisterName(rd); + return "jal " + rd_name + ", " + target + "\n"; + }; + } + + auto GetPrintCallRd(const std::string& base_label) { + return [=](XRegister rd, const std::string& target) { + std::string rd_name = GetRegisterName(rd); + std::string temp_name = (rd != Zero) ? rd_name : GetRegisterName(TMP); + return base_label + ":\n" + + "auipc " + temp_name + ", %pcrel_hi(" + target + ")\n" + + "jalr " + rd_name + ", %pcrel_lo(" + base_label + "b)(" + temp_name + ")\n"; + }; + } + + template <typename PrintJalRd> + void TestJalRdForward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + PrintJalRd&& print_jalrd, + bool is_bare = false) { + std::string expected; + Riscv64Label label; + for (XRegister reg : GetRegisters()) { + __ Jal(reg, &label, is_bare); + expected += print_jalrd(reg, label_name + "f"); + } + expected += EmitNops(gap_size); + __ Bind(&label); + expected += label_name + ":\n"; + DriverStr(expected, test_name); + } + + template <typename PrintJalRd> + void TestJalRdBackward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + PrintJalRd&& print_jalrd, + bool is_bare = false) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += label_name + ":\n"; + expected += EmitNops(gap_size); + for (XRegister reg : GetRegisters()) { + __ Jal(reg, &label, is_bare); + expected += print_jalrd(reg, label_name + "b"); + } + DriverStr(expected, test_name); + } + + auto GetEmitJ(bool is_bare = false) { + return [=](Riscv64Label* label) { __ J(label, is_bare); }; + } + + auto GetEmitJal() { + return [=](Riscv64Label* label) { __ Jal(label); }; + } + + auto GetPrintJ() { + return [=](const std::string& target) { + return "j " + target + "\n"; + }; + } + + auto GetPrintJal() { + return [=](const std::string& target) { + return "jal " + target + "\n"; + }; + } + + auto GetPrintTail(const std::string& base_label) { + return [=](const std::string& target) { + return base_label + ":\n" + + "auipc t6, %pcrel_hi(" + target + ")\n" + + "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n"; + }; + } + + auto GetPrintCall(const std::string& base_label) { + return [=](const std::string& target) { + return base_label + ":\n" + + "auipc ra, %pcrel_hi(" + target + ")\n" + + "jalr ra, %pcrel_lo(" + base_label + "b)(ra)\n"; + }; + } + + template <typename EmitBuncond, typename PrintBuncond> + void TestBuncondForward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + EmitBuncond&& emit_buncond, + PrintBuncond&& print_buncond) { + std::string expected; + Riscv64Label label; + emit_buncond(&label); + expected += print_buncond(label_name + "f"); + expected += EmitNops(gap_size); + __ Bind(&label); + expected += label_name + ":\n"; + DriverStr(expected, test_name); + } + + template <typename EmitBuncond, typename PrintBuncond> + void TestBuncondBackward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + EmitBuncond&& emit_buncond, + PrintBuncond&& print_buncond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += label_name + ":\n"; + expected += EmitNops(gap_size); + emit_buncond(&label); + expected += print_buncond(label_name + "b"); + DriverStr(expected, test_name); + } + + template <typename EmitOp> + void TestAddConst(const std::string& test_name, + size_t bits, + const std::string& suffix, + EmitOp&& emit_op) { + int64_t kImm12s[] = { + 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff, + -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800, + }; + int64_t kSimplePositiveValues[] = { + 0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff7, 0xff8, 0xffb, 0xffc, 0xffd, 0xffe, + }; + int64_t kSimpleNegativeValues[] = { + -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000, + }; + std::vector<int64_t> large_values = CreateImmediateValuesBits(bits, /*as_uint=*/ false); + auto kept_end = std::remove_if(large_values.begin(), + large_values.end(), + [](int64_t value) { return IsInt<13>(value); }); + large_values.erase(kept_end, large_values.end()); + large_values.push_back(0xfff); + + std::string expected; + for (XRegister rd : GetRegisters()) { + std::string rd_name = GetRegisterName(rd); + std::string addi_rd = ART_FORMAT("addi{} {}, ", suffix, rd_name); + std::string add_rd = ART_FORMAT("add{} {}, ", suffix, rd_name); + for (XRegister rs1 : GetRegisters()) { + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(rs1); + srs.ExcludeXRegister(rd); + + std::string rs1_name = GetRegisterName(rs1); + std::string tmp_name = GetRegisterName((rs1 != TMP) ? TMP : TMP2); + std::string addi_tmp = ART_FORMAT("addi{} {}, ", suffix, tmp_name); + + for (int64_t imm : kImm12s) { + emit_op(rd, rs1, imm); + expected += ART_FORMAT("{}{}, {}\n", addi_rd, rs1_name, std::to_string(imm)); + } + + auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) { + for (int64_t imm : imms) { + emit_op(rd, rs1, imm); + expected += ART_FORMAT("{}{}, {}\n", addi_tmp, rs1_name, std::to_string(adjustment)); + expected += + ART_FORMAT("{}{}, {}\n", addi_rd, tmp_name, std::to_string(imm - adjustment)); + } + }; + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveValues), 0x7ff); + emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeValues), -0x800); + + for (int64_t imm : large_values) { + emit_op(rd, rs1, imm); + expected += ART_FORMAT("li {}, {}\n", tmp_name, std::to_string(imm)); + expected += ART_FORMAT("{}{}, {}\n", add_rd, rs1_name, tmp_name); + } + } + } + DriverStr(expected, test_name); + } + + template <typename GetTemp, typename EmitOp> + std::string RepeatLoadStoreArbitraryOffset(const std::string& head, + GetTemp&& get_temp, + EmitOp&& emit_op) { + int64_t kImm12s[] = { + 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff, + -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800, + }; + int64_t kSimplePositiveOffsetsAlign8[] = { + 0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff4, 0xff6, 0xff7 + }; + int64_t kSimplePositiveOffsetsAlign4[] = { + 0xff8, 0xff9, 0xffa, 0xffb + }; + int64_t kSimplePositiveOffsetsAlign2[] = { + 0xffc, 0xffd + }; + int64_t kSimplePositiveOffsetsNoAlign[] = { + 0xffe + }; + int64_t kSimpleNegativeOffsets[] = { + -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000, + }; + int64_t kSplitOffsets[] = { + 0xfff, 0x1000, 0x1001, 0x17ff, 0x1800, 0x1fff, 0x2000, 0x2001, 0x27ff, 0x2800, + 0x7fffe7ff, 0x7fffe800, 0x7fffefff, 0x7ffff000, 0x7ffff001, 0x7ffff7ff, + -0x1001, -0x1002, -0x17ff, -0x1800, -0x1801, -0x2000, -0x2001, -0x2800, -0x2801, + -0x7ffff000, -0x7ffff001, -0x7ffff800, -0x7ffff801, -0x7fffffff, -0x80000000, + }; + int64_t kSpecialOffsets[] = { + 0x7ffff800, 0x7ffff801, 0x7ffffffe, 0x7fffffff + }; + + std::string expected; + for (XRegister rs1 : GetRegisters()) { + XRegister tmp = get_temp(rs1); + if (tmp == kNoXRegister) { + continue; // Unsupported register combination. + } + std::string tmp_name = GetRegisterName(tmp); + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(rs1); + std::string rs1_name = GetRegisterName(rs1); + + for (int64_t imm : kImm12s) { + emit_op(rs1, imm); + expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm), rs1_name); + } + + auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) { + for (int64_t imm : imms) { + emit_op(rs1, imm); + expected += + ART_FORMAT("addi {}, {}, {}\n", tmp_name, rs1_name, std::to_string(adjustment)); + expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm - adjustment), tmp_name); + } + }; + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign8), 0x7f8); + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign4), 0x7fc); + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign2), 0x7fe); + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsNoAlign), 0x7ff); + emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeOffsets), -0x800); + + for (int64_t imm : kSplitOffsets) { + emit_op(rs1, imm); + uint32_t imm20 = ((imm >> 12) + ((imm >> 11) & 1)) & 0xfffff; + int32_t small_offset = (imm & 0xfff) - ((imm & 0x800) << 1); + expected += ART_FORMAT("lui {}, {}\n", tmp_name, std::to_string(imm20)); + expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name); + expected += ART_FORMAT("{},{}({})\n", head, std::to_string(small_offset), tmp_name); + } + + for (int64_t imm : kSpecialOffsets) { + emit_op(rs1, imm); + expected += ART_FORMAT("lui {}, 0x80000\n", tmp_name); + expected += + ART_FORMAT("addiw {}, {}, {}\n", tmp_name, tmp_name, std::to_string(imm - 0x80000000)); + expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name); + expected += ART_FORMAT("{}, ({})\n", head, tmp_name); + } + } + return expected; + } + + void TestLoadStoreArbitraryOffset(const std::string& test_name, + const std::string& insn, + void (Riscv64Assembler::*fn)(XRegister, XRegister, int32_t), + bool is_store) { + std::string expected; + for (XRegister rd : GetRegisters()) { + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(rd); + auto get_temp = [&](XRegister rs1) { + if (is_store) { + return (rs1 != TMP && rd != TMP) + ? TMP + : (rs1 != TMP2 && rd != TMP2) ? TMP2 : kNoXRegister; + } else { + return rs1 != TMP ? TMP : TMP2; + } + }; + expected += RepeatLoadStoreArbitraryOffset( + insn + " " + GetRegisterName(rd), + get_temp, + [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); }); + } + DriverStr(expected, test_name); + } + + void TestFPLoadStoreArbitraryOffset(const std::string& test_name, + const std::string& insn, + void (Riscv64Assembler::*fn)(FRegister, XRegister, int32_t)) { + std::string expected; + for (FRegister rd : GetFPRegisters()) { + expected += RepeatLoadStoreArbitraryOffset( + insn + " " + GetFPRegName(rd), + [&](XRegister rs1) { return rs1 != TMP ? TMP : TMP2; }, + [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); }); + } + DriverStr(expected, test_name); + } + + void TestLoadLiteral(const std::string& test_name, bool with_padding_for_long) { + std::string expected; + Literal* narrow_literal = __ NewLiteral<uint32_t>(0x12345678); + Literal* wide_literal = __ NewLiteral<uint64_t>(0x1234567887654321); + auto print_load = [&](const std::string& load, XRegister rd, const std::string& label) { + std::string rd_name = GetRegisterName(rd); + expected += "1:\n" + "auipc " + rd_name + ", %pcrel_hi(" + label + "f)\n" + + load + " " + rd_name + ", %pcrel_lo(1b)(" + rd_name + ")\n"; + }; + for (XRegister reg : GetRegisters()) { + if (reg != Zero) { + __ Loadw(reg, narrow_literal); + print_load("lw", reg, "2"); + __ Loadwu(reg, narrow_literal); + print_load("lwu", reg, "2"); + __ Loadd(reg, wide_literal); + print_load("ld", reg, "3"); + } + } + std::string tmp = GetRegisterName(TMP); + auto print_fp_load = [&](const std::string& load, FRegister rd, const std::string& label) { + std::string rd_name = GetFPRegName(rd); + expected += "1:\n" + "auipc " + tmp + ", %pcrel_hi(" + label + "f)\n" + + load + " " + rd_name + ", %pcrel_lo(1b)(" + tmp + ")\n"; + }; + for (FRegister freg : GetFPRegisters()) { + __ FLoadw(freg, narrow_literal); + print_fp_load("flw", freg, "2"); + __ FLoadd(freg, wide_literal); + print_fp_load("fld", freg, "3"); + } + // All literal loads above emit 8 bytes of code. The narrow literal shall emit 4 bytes of code. + // If we do not add another instruction, we shall end up with padding before the long literal. + expected += EmitNops(with_padding_for_long ? 0u : sizeof(uint32_t)); + expected += "2:\n" + ".4byte 0x12345678\n" + + std::string(with_padding_for_long ? ".4byte 0\n" : "") + + "3:\n" + ".8byte 0x1234567887654321\n"; + DriverStr(expected, test_name); + } + + std::string RepeatFFFFRoundingMode( + void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + CHECK(f != nullptr); + std::string str; + for (FRegister reg1 : GetFPRegisters()) { + for (FRegister reg2 : GetFPRegisters()) { + for (FRegister reg3 : GetFPRegisters()) { + for (FRegister reg4 : GetFPRegisters()) { + for (FPRoundingMode rm : kRoundingModes) { + (GetAssembler()->*f)(reg1, reg2, reg3, reg4, rm); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base); + ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base); + ReplaceReg(REG4_TOKEN, GetFPRegName(reg4), &base); + ReplaceRoundingMode(rm, &base); + str += base; + str += "\n"; + } + } + } + } + } + return str; + } + + std::string RepeatFFFRoundingMode( + void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + CHECK(f != nullptr); + std::string str; + for (FRegister reg1 : GetFPRegisters()) { + for (FRegister reg2 : GetFPRegisters()) { + for (FRegister reg3 : GetFPRegisters()) { + for (FPRoundingMode rm : kRoundingModes) { + (GetAssembler()->*f)(reg1, reg2, reg3, rm); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base); + ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base); + ReplaceRoundingMode(rm, &base); + str += base; + str += "\n"; + } + } + } + } + return str; + } + + template <typename Reg1, typename Reg2> + std::string RepeatTemplatedRegistersRoundingMode( + void (Riscv64Assembler::*f)(Reg1, Reg2, FPRoundingMode), + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + std::string (Base::*GetName1)(const Reg1&), + std::string (Base::*GetName2)(const Reg2&), + const std::string& fmt) { + CHECK(f != nullptr); + std::string str; + for (Reg1 reg1 : reg1_registers) { + for (Reg2 reg2 : reg2_registers) { + for (FPRoundingMode rm : kRoundingModes) { + (GetAssembler()->*f)(reg1, reg2, rm); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceRoundingMode(rm, &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + std::string RepeatFFRoundingMode( + void (Riscv64Assembler::*f)(FRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + return RepeatTemplatedRegistersRoundingMode(f, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerRISCV64Test::GetFPRegName, + &AssemblerRISCV64Test::GetFPRegName, + fmt); + } + + std::string RepeatrFRoundingMode( + void (Riscv64Assembler::*f)(XRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + return RepeatTemplatedRegistersRoundingMode(f, + GetRegisters(), + GetFPRegisters(), + &Base::GetSecondaryRegisterName, + &AssemblerRISCV64Test::GetFPRegName, + fmt); + } + + std::string RepeatFrRoundingMode( + void (Riscv64Assembler::*f)(FRegister, XRegister, FPRoundingMode), + const std::string& fmt) { + return RepeatTemplatedRegistersRoundingMode(f, + GetFPRegisters(), + GetRegisters(), + &AssemblerRISCV64Test::GetFPRegName, + &Base::GetSecondaryRegisterName, + fmt); + } + + template <typename InvalidAqRl> + std::string RepeatRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, AqRl), + const std::string& fmt, + InvalidAqRl&& invalid_aqrl) { + CHECK(f != nullptr); + std::string str; + for (XRegister reg1 : GetRegisters()) { + for (XRegister reg2 : GetRegisters()) { + for (AqRl aqrl : kAqRls) { + if (invalid_aqrl(aqrl)) { + continue; + } + (GetAssembler()->*f)(reg1, reg2, aqrl); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base); + ReplaceAqRl(aqrl, &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + template <typename InvalidAqRl> + std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl), + const std::string& fmt, + InvalidAqRl&& invalid_aqrl) { + CHECK(f != nullptr); + std::string str; + for (XRegister reg1 : GetRegisters()) { + for (XRegister reg2 : GetRegisters()) { + for (XRegister reg3 : GetRegisters()) { + for (AqRl aqrl : kAqRls) { + if (invalid_aqrl(aqrl)) { + continue; + } + (GetAssembler()->*f)(reg1, reg2, reg3, aqrl); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base); + ReplaceReg(REG3_TOKEN, GetRegisterName(reg3), &base); + ReplaceAqRl(aqrl, &base); + str += base; + str += "\n"; + } + } + } + } + return str; + } + + std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl), + const std::string& fmt) { + return RepeatRRRAqRl(f, fmt, [](AqRl) { return false; }); + } + + std::string RepeatCsrrX(void (Riscv64Assembler::*f)(XRegister, uint32_t, XRegister), + const std::string& fmt) { + CHECK(f != nullptr); + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::string str; + for (XRegister reg1 : GetRegisters()) { + for (int64_t csr : csrs) { + for (XRegister reg2 : GetRegisters()) { + (GetAssembler()->*f)(reg1, dchecked_integral_cast<uint32_t>(csr), reg2); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base); + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + std::string RepeatCsrrXi(void (Riscv64Assembler::*f)(XRegister, uint32_t, uint32_t), + const std::string& fmt) { + CHECK(f != nullptr); + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true); + std::string str; + for (XRegister reg : GetRegisters()) { + for (int64_t csr : csrs) { + for (int64_t uimm : uimms) { + (GetAssembler()->*f)( + reg, dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm)); + + std::string base = fmt; + ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base); + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + ReplaceCsrrImm(UIMM_TOKEN, uimm, &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + template <typename EmitCssrX> + void TestCsrrXMacro(const std::string& test_name, + const std::string& fmt, + EmitCssrX&& emit_csrrx) { + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::string expected; + for (XRegister reg : GetRegisters()) { + for (int64_t csr : csrs) { + emit_csrrx(dchecked_integral_cast<uint32_t>(csr), reg); + + std::string base = fmt; + ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base); + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + expected += base; + expected += "\n"; + } + } + DriverStr(expected, test_name); + } + + template <typename EmitCssrXi> + void TestCsrrXiMacro(const std::string& test_name, + const std::string& fmt, + EmitCssrXi&& emit_csrrxi) { + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true); + std::string expected; + for (int64_t csr : csrs) { + for (int64_t uimm : uimms) { + emit_csrrxi(dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm)); + + std::string base = fmt; + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + ReplaceCsrrImm(UIMM_TOKEN, uimm, &base); + expected += base; + expected += "\n"; + } + } + DriverStr(expected, test_name); + } + + private: + static constexpr const char* RM_TOKEN = "{rm}"; + static constexpr const char* AQRL_TOKEN = "{aqrl}"; + static constexpr const char* CSR_TOKEN = "{csr}"; + static constexpr const char* UIMM_TOKEN = "{uimm}"; + + static constexpr AqRl kAqRls[] = { AqRl::kNone, AqRl::kRelease, AqRl::kAcquire, AqRl::kAqRl }; + + static constexpr FPRoundingMode kRoundingModes[] = { + FPRoundingMode::kRNE, + FPRoundingMode::kRTZ, + FPRoundingMode::kRDN, + FPRoundingMode::kRUP, + FPRoundingMode::kRMM, + FPRoundingMode::kDYN + }; + + void ReplaceRoundingMode(FPRoundingMode rm, /*inout*/ std::string* str) { + const char* replacement; + switch (rm) { + case FPRoundingMode::kRNE: + replacement = "rne"; + break; + case FPRoundingMode::kRTZ: + replacement = "rtz"; + break; + case FPRoundingMode::kRDN: + replacement = "rdn"; + break; + case FPRoundingMode::kRUP: + replacement = "rup"; + break; + case FPRoundingMode::kRMM: + replacement = "rmm"; + break; + case FPRoundingMode::kDYN: + replacement = "dyn"; + break; + default: + LOG(FATAL) << "Unexpected value for rm: " << enum_cast<uint32_t>(rm); + UNREACHABLE(); + } + size_t rm_index = str->find(RM_TOKEN); + EXPECT_NE(rm_index, std::string::npos); + if (rm_index != std::string::npos) { + str->replace(rm_index, ConstexprStrLen(RM_TOKEN), replacement); + } + } + + void ReplaceAqRl(AqRl aqrl, /*inout*/ std::string* str) { + const char* replacement; + switch (aqrl) { + case AqRl::kNone: + replacement = ""; + break; + case AqRl::kRelease: + replacement = ".rl"; + break; + case AqRl::kAcquire: + replacement = ".aq"; + break; + case AqRl::kAqRl: + replacement = ".aqrl"; + break; + default: + LOG(FATAL) << "Unexpected value for `aqrl`: " << enum_cast<uint32_t>(aqrl); + UNREACHABLE(); + } + size_t aqrl_index = str->find(AQRL_TOKEN); + EXPECT_NE(aqrl_index, std::string::npos); + if (aqrl_index != std::string::npos) { + str->replace(aqrl_index, ConstexprStrLen(AQRL_TOKEN), replacement); + } + } + + static void ReplaceCsrrImm(const std::string& imm_token, + int64_t imm, + /*inout*/ std::string* str) { + size_t imm_index = str->find(imm_token); + EXPECT_NE(imm_index, std::string::npos); + if (imm_index != std::string::npos) { + str->replace(imm_index, imm_token.length(), std::to_string(imm)); + } + } + + std::map<XRegister, std::string, RISCV64CpuRegisterCompare> secondary_register_names_; + + std::unique_ptr<const Riscv64InstructionSetFeatures> instruction_set_features_; + bool use_simple_march_ = false; +}; + +TEST_F(AssemblerRISCV64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } + +TEST_F(AssemblerRISCV64Test, Lui) { + DriverStr(RepeatRIb(&Riscv64Assembler::Lui, 20, "lui {reg}, {imm}"), "Lui"); +} + +TEST_F(AssemblerRISCV64Test, Auipc) { + DriverStr(RepeatRIb(&Riscv64Assembler::Auipc, 20, "auipc {reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerRISCV64Test, Jal) { + // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Jal, -19, 2, "jal {reg}, {imm}\n"), "Jal"); +} + +TEST_F(AssemblerRISCV64Test, Jalr) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIb(&Riscv64Assembler::Jalr, -12, "jalr {reg1}, {reg2}, {imm}\n"), "Jalr"); +} + +TEST_F(AssemblerRISCV64Test, Beq) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Beq, -11, 2, "beq {reg1}, {reg2}, {imm}\n"), "Beq"); +} + +TEST_F(AssemblerRISCV64Test, Bne) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bne, -11, 2, "bne {reg1}, {reg2}, {imm}\n"), "Bne"); +} + +TEST_F(AssemblerRISCV64Test, Blt) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Blt, -11, 2, "blt {reg1}, {reg2}, {imm}\n"), "Blt"); +} + +TEST_F(AssemblerRISCV64Test, Bge) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bge, -11, 2, "bge {reg1}, {reg2}, {imm}\n"), "Bge"); +} + +TEST_F(AssemblerRISCV64Test, Bltu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bltu, -11, 2, "bltu {reg1}, {reg2}, {imm}\n"), "Bltu"); +} + +TEST_F(AssemblerRISCV64Test, Bgeu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgeu, -11, 2, "bgeu {reg1}, {reg2}, {imm}\n"), "Bgeu"); +} + +TEST_F(AssemblerRISCV64Test, Lb) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lb, -12, "lb {reg1}, {imm}({reg2})"), "Lb"); +} + +TEST_F(AssemblerRISCV64Test, Lh) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lh, -12, "lh {reg1}, {imm}({reg2})"), "Lh"); +} + +TEST_F(AssemblerRISCV64Test, Lw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lw, -12, "lw {reg1}, {imm}({reg2})"), "Lw"); +} + +TEST_F(AssemblerRISCV64Test, Ld) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Ld, -12, "ld {reg1}, {imm}({reg2})"), "Ld"); +} + +TEST_F(AssemblerRISCV64Test, Lbu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lbu, -12, "lbu {reg1}, {imm}({reg2})"), "Lbu"); +} + +TEST_F(AssemblerRISCV64Test, Lhu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lhu, -12, "lhu {reg1}, {imm}({reg2})"), "Lhu"); +} + +TEST_F(AssemblerRISCV64Test, Lwu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lwu, -12, "lwu {reg1}, {imm}({reg2})"), "Lwu"); +} + +TEST_F(AssemblerRISCV64Test, Sb) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sb, -12, "sb {reg1}, {imm}({reg2})"), "Sb"); +} + +TEST_F(AssemblerRISCV64Test, Sh) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sh, -12, "sh {reg1}, {imm}({reg2})"), "Sh"); +} + +TEST_F(AssemblerRISCV64Test, Sw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sw, -12, "sw {reg1}, {imm}({reg2})"), "Sw"); +} + +TEST_F(AssemblerRISCV64Test, Sd) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sd, -12, "sd {reg1}, {imm}({reg2})"), "Sd"); +} + +TEST_F(AssemblerRISCV64Test, Addi) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Addi, -12, "addi {reg1}, {reg2}, {imm}"), "Addi"); +} + +TEST_F(AssemblerRISCV64Test, Slti) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Slti, -12, "slti {reg1}, {reg2}, {imm}"), "Slti"); +} + +TEST_F(AssemblerRISCV64Test, Sltiu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sltiu, -12, "sltiu {reg1}, {reg2}, {imm}"), "Sltiu"); +} + +TEST_F(AssemblerRISCV64Test, Xori) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Xori, 11, "xori {reg1}, {reg2}, {imm}"), "Xori"); +} + +TEST_F(AssemblerRISCV64Test, Ori) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Ori, -12, "ori {reg1}, {reg2}, {imm}"), "Ori"); +} + +TEST_F(AssemblerRISCV64Test, Andi) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Andi, -12, "andi {reg1}, {reg2}, {imm}"), "Andi"); +} + +TEST_F(AssemblerRISCV64Test, Slli) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Slli, 6, "slli {reg1}, {reg2}, {imm}"), "Slli"); +} + +TEST_F(AssemblerRISCV64Test, Srli) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Srli, 6, "srli {reg1}, {reg2}, {imm}"), "Srli"); +} + +TEST_F(AssemblerRISCV64Test, Srai) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Srai, 6, "srai {reg1}, {reg2}, {imm}"), "Srai"); +} + +TEST_F(AssemblerRISCV64Test, Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Add, "add {reg1}, {reg2}, {reg3}"), "Add"); +} + +TEST_F(AssemblerRISCV64Test, Sub) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sub, "sub {reg1}, {reg2}, {reg3}"), "Sub"); +} + +TEST_F(AssemblerRISCV64Test, Slt) { + DriverStr(RepeatRRR(&Riscv64Assembler::Slt, "slt {reg1}, {reg2}, {reg3}"), "Slt"); +} + +TEST_F(AssemblerRISCV64Test, Sltu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sltu, "sltu {reg1}, {reg2}, {reg3}"), "Sltu"); +} + +TEST_F(AssemblerRISCV64Test, Xor) { + DriverStr(RepeatRRR(&Riscv64Assembler::Xor, "xor {reg1}, {reg2}, {reg3}"), "Xor"); +} + +TEST_F(AssemblerRISCV64Test, Or) { + DriverStr(RepeatRRR(&Riscv64Assembler::Or, "or {reg1}, {reg2}, {reg3}"), "Or"); +} + +TEST_F(AssemblerRISCV64Test, And) { + DriverStr(RepeatRRR(&Riscv64Assembler::And, "and {reg1}, {reg2}, {reg3}"), "And"); +} + +TEST_F(AssemblerRISCV64Test, Sll) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sll, "sll {reg1}, {reg2}, {reg3}"), "Sll"); +} + +TEST_F(AssemblerRISCV64Test, Srl) { + DriverStr(RepeatRRR(&Riscv64Assembler::Srl, "srl {reg1}, {reg2}, {reg3}"), "Srl"); +} + +TEST_F(AssemblerRISCV64Test, Sra) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sra, "sra {reg1}, {reg2}, {reg3}"), "Sra"); +} + +TEST_F(AssemblerRISCV64Test, Addiw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Addiw, -12, "addiw {reg1}, {reg2}, {imm}"), "Addiw"); +} + +TEST_F(AssemblerRISCV64Test, Slliw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Slliw, 5, "slliw {reg1}, {reg2}, {imm}"), "Slliw"); +} + +TEST_F(AssemblerRISCV64Test, Srliw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Srliw, 5, "srliw {reg1}, {reg2}, {imm}"), "Srliw"); +} + +TEST_F(AssemblerRISCV64Test, Sraiw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sraiw, 5, "sraiw {reg1}, {reg2}, {imm}"), "Sraiw"); +} + +TEST_F(AssemblerRISCV64Test, Addw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Addw, "addw {reg1}, {reg2}, {reg3}"), "Addw"); +} + +TEST_F(AssemblerRISCV64Test, Subw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Subw, "subw {reg1}, {reg2}, {reg3}"), "Subw"); +} + +TEST_F(AssemblerRISCV64Test, Sllw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sllw, "sllw {reg1}, {reg2}, {reg3}"), "Sllw"); +} + +TEST_F(AssemblerRISCV64Test, Srlw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Srlw, "srlw {reg1}, {reg2}, {reg3}"), "Srlw"); +} + +TEST_F(AssemblerRISCV64Test, Sraw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sraw, "sraw {reg1}, {reg2}, {reg3}"), "Sraw"); +} + +TEST_F(AssemblerRISCV64Test, Ecall) { + __ Ecall(); + DriverStr("ecall\n", "Ecall"); +} + +TEST_F(AssemblerRISCV64Test, Ebreak) { + __ Ebreak(); + DriverStr("ebreak\n", "Ebreak"); +} + +TEST_F(AssemblerRISCV64Test, Fence) { + auto get_fence_type_string = [](uint32_t fence_type) { + CHECK_LE(fence_type, 0xfu); + std::string result; + if ((fence_type & kFenceInput) != 0u) { + result += "i"; + } + if ((fence_type & kFenceOutput) != 0u) { + result += "o"; + } + if ((fence_type & kFenceRead) != 0u) { + result += "r"; + } + if ((fence_type & kFenceWrite) != 0u) { + result += "w"; + } + if (result.empty()) { + result += "0"; + } + return result; + }; + + std::string expected; + // Note: The `pred` and `succ` are 4 bits each. + // Some combinations are not really useful but the assembler can emit them all. + for (uint32_t pred = 0u; pred != 0x10; ++pred) { + for (uint32_t succ = 0u; succ != 0x10; ++succ) { + __ Fence(pred, succ); + expected += + "fence " + get_fence_type_string(pred) + ", " + get_fence_type_string(succ) + "\n"; + } + } + DriverStr(expected, "Fence"); +} + +TEST_F(AssemblerRISCV64Test, FenceTso) { + __ FenceTso(); + DriverStr("fence.tso", "FenceTso"); +} + +TEST_F(AssemblerRISCV64Test, FenceI) { + __ FenceI(); + DriverStr("fence.i", "FenceI"); +} + +TEST_F(AssemblerRISCV64Test, Mul) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mul, "mul {reg1}, {reg2}, {reg3}"), "Mul"); +} + +TEST_F(AssemblerRISCV64Test, Mulh) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulh, "mulh {reg1}, {reg2}, {reg3}"), "Mulh"); +} + +TEST_F(AssemblerRISCV64Test, Mulhsu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulhsu, "mulhsu {reg1}, {reg2}, {reg3}"), "Mulhsu"); +} + +TEST_F(AssemblerRISCV64Test, Mulhu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulhu, "mulhu {reg1}, {reg2}, {reg3}"), "Mulhu"); +} + +TEST_F(AssemblerRISCV64Test, Div) { + DriverStr(RepeatRRR(&Riscv64Assembler::Div, "div {reg1}, {reg2}, {reg3}"), "Div"); +} + +TEST_F(AssemblerRISCV64Test, Divu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Divu, "divu {reg1}, {reg2}, {reg3}"), "Divu"); +} + +TEST_F(AssemblerRISCV64Test, Rem) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rem, "rem {reg1}, {reg2}, {reg3}"), "Rem"); +} + +TEST_F(AssemblerRISCV64Test, Remu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Remu, "remu {reg1}, {reg2}, {reg3}"), "Remu"); +} + +TEST_F(AssemblerRISCV64Test, Mulw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulw, "mulw {reg1}, {reg2}, {reg3}"), "Mulw"); +} + +TEST_F(AssemblerRISCV64Test, Divw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Divw, "divw {reg1}, {reg2}, {reg3}"), "Divw"); +} + +TEST_F(AssemblerRISCV64Test, Divuw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Divuw, "divuw {reg1}, {reg2}, {reg3}"), "Divuw"); +} + +TEST_F(AssemblerRISCV64Test, Remw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Remw, "remw {reg1}, {reg2}, {reg3}"), "Remw"); +} + +TEST_F(AssemblerRISCV64Test, Remuw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Remuw, "remuw {reg1}, {reg2}, {reg3}"), "Remuw"); +} + +TEST_F(AssemblerRISCV64Test, LrW) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; }; + DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrW, "lr.w{aqrl} {reg1}, ({reg2})", invalid_aqrl), + "LrW"); +} + +TEST_F(AssemblerRISCV64Test, LrD) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; }; + DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrD, "lr.d{aqrl} {reg1}, ({reg2})", invalid_aqrl), + "LrD"); +} + +TEST_F(AssemblerRISCV64Test, ScW) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; }; + DriverStr( + RepeatRRRAqRl(&Riscv64Assembler::ScW, "sc.w{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl), + "ScW"); +} + +TEST_F(AssemblerRISCV64Test, ScD) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; }; + DriverStr( + RepeatRRRAqRl(&Riscv64Assembler::ScD, "sc.d{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl), + "ScD"); +} + +TEST_F(AssemblerRISCV64Test, AmoSwapW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapW, "amoswap.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoSwapW"); +} + +TEST_F(AssemblerRISCV64Test, AmoSwapD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapD, "amoswap.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoSwapD"); +} + +TEST_F(AssemblerRISCV64Test, AmoAddW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddW, "amoadd.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAddW"); +} + +TEST_F(AssemblerRISCV64Test, AmoAddD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddD, "amoadd.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAddD"); +} + +TEST_F(AssemblerRISCV64Test, AmoXorW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorW, "amoxor.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoXorW"); +} + +TEST_F(AssemblerRISCV64Test, AmoXorD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorD, "amoxor.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoXorD"); +} + +TEST_F(AssemblerRISCV64Test, AmoAndW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndW, "amoand.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAndW"); +} + +TEST_F(AssemblerRISCV64Test, AmoAndD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndD, "amoand.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAndD"); +} + +TEST_F(AssemblerRISCV64Test, AmoOrW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrW, "amoor.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoOrW"); +} + +TEST_F(AssemblerRISCV64Test, AmoOrD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrD, "amoor.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoOrD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinW, "amomin.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinD, "amomin.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxW, "amomax.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxD, "amomax.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinuW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuW, "amominu.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinuW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinuD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuD, "amominu.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinuD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxuW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuW, "amomaxu.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxuW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxuD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuD, "amomaxu.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxuD"); +} + +TEST_F(AssemblerRISCV64Test, Csrrw) { + DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrw, "csrrw {reg1}, {csr}, {reg2}"), "Csrrw"); +} + +TEST_F(AssemblerRISCV64Test, Csrrs) { + DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrs, "csrrs {reg1}, {csr}, {reg2}"), "Csrrs"); +} + +TEST_F(AssemblerRISCV64Test, Csrrc) { + DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrc, "csrrc {reg1}, {csr}, {reg2}"), "Csrrc"); +} + +TEST_F(AssemblerRISCV64Test, Csrrwi) { + DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrwi, "csrrwi {reg}, {csr}, {uimm}"), "Csrrwi"); +} + +TEST_F(AssemblerRISCV64Test, Csrrsi) { + DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrsi, "csrrsi {reg}, {csr}, {uimm}"), "Csrrsi"); +} + +TEST_F(AssemblerRISCV64Test, Csrrci) { + DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrci, "csrrci {reg}, {csr}, {uimm}"), "Csrrci"); +} + +TEST_F(AssemblerRISCV64Test, FLw) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FLw, -12, "flw {reg1}, {imm}({reg2})"), "FLw"); +} + +TEST_F(AssemblerRISCV64Test, FLd) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FLd, -12, "fld {reg1}, {imm}({reg2})"), "FLw"); +} + +TEST_F(AssemblerRISCV64Test, FSw) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FSw, 2, "fsw {reg1}, {imm}({reg2})"), "FSw"); +} + +TEST_F(AssemblerRISCV64Test, FSd) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FSd, 2, "fsd {reg1}, {imm}({reg2})"), "FSd"); +} + +TEST_F(AssemblerRISCV64Test, FMAddS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddS, + "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddS"); +} + +TEST_F(AssemblerRISCV64Test, FMAddS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddS, "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FMAddS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMAddD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddD, + "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddD"); +} + +TEST_F(AssemblerRISCV64Test, FMAddD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddD, "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FMAddD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMSubS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubS, + "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubS"); +} + +TEST_F(AssemblerRISCV64Test, FMSubS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubS, "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FMSubS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMSubD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubD, + "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubD"); +} + +TEST_F(AssemblerRISCV64Test, FMSubD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubD, "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FMSubD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubS, + "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubS"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubS, "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMSubS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubD, + "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubD"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubD, "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMSubD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddS, + "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddS"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddS, "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMAddS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddD, + "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddD"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddD, "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMAddD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FAddS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}, {rm}"), + "FAddS"); +} + +TEST_F(AssemblerRISCV64Test, FAddS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}"), "FAddS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FAddD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}, {rm}"), + "FAddD"); +} + +TEST_F(AssemblerRISCV64Test, FAddD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}"), "FAddD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSubS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}, {rm}"), + "FSubS"); +} + +TEST_F(AssemblerRISCV64Test, FSubS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}"), "FSubS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSubD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}, {rm}"), + "FSubD"); +} + +TEST_F(AssemblerRISCV64Test, FSubD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}"), "FSubD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMulS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}, {rm}"), + "FMulS"); +} + +TEST_F(AssemblerRISCV64Test, FMulS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}"), "FMulS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMulD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}, {rm}"), + "FMulD"); +} + +TEST_F(AssemblerRISCV64Test, FMulD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}"), "FMulD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FDivS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}, {rm}"), + "FDivS"); +} + +TEST_F(AssemblerRISCV64Test, FDivS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}"), "FDivS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FDivD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}, {rm}"), + "FDivD"); +} + +TEST_F(AssemblerRISCV64Test, FDivD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}"), "FDivD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtS) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}, {rm}"), + "FSqrtS"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtS_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}"), "FSqrtS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtD) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}, {rm}"), + "FSqrtD"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtD_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}"), "FSqrtD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjS, "fsgnj.s {reg1}, {reg2}, {reg3}"), "FSgnjS"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjD, "fsgnj.d {reg1}, {reg2}, {reg3}"), "FSgnjD"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjnS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnS, "fsgnjn.s {reg1}, {reg2}, {reg3}"), "FSgnjnS"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjnD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnD, "fsgnjn.d {reg1}, {reg2}, {reg3}"), "FSgnjnD"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjxS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxS, "fsgnjx.s {reg1}, {reg2}, {reg3}"), "FSgnjxS"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjxD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxD, "fsgnjx.d {reg1}, {reg2}, {reg3}"), "FSgnjxD"); +} + +TEST_F(AssemblerRISCV64Test, FMinS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMinS, "fmin.s {reg1}, {reg2}, {reg3}"), "FMinS"); +} + +TEST_F(AssemblerRISCV64Test, FMinD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMinD, "fmin.d {reg1}, {reg2}, {reg3}"), "FMinD"); +} + +TEST_F(AssemblerRISCV64Test, FMaxS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMaxS, "fmax.s {reg1}, {reg2}, {reg3}"), "FMaxS"); +} + +TEST_F(AssemblerRISCV64Test, FMaxD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMaxD, "fmax.d {reg1}, {reg2}, {reg3}"), "FMaxD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSD) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}, {rm}"), + "FCvtSD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSD_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}"), "FCvtSD_Default"); +} + +// This conversion is lossless, so the rounding mode is meaningless and the assembler we're +// testing against does not even accept the rounding mode argument, so this test is disabled. +TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDS) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}, {rm}"), + "FCvtDS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDS_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}"), "FCvtDS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FEqS) { + DriverStr(RepeatRFF(&Riscv64Assembler::FEqS, "feq.s {reg1}, {reg2}, {reg3}"), "FEqS"); +} + +TEST_F(AssemblerRISCV64Test, FEqD) { + DriverStr(RepeatRFF(&Riscv64Assembler::FEqD, "feq.d {reg1}, {reg2}, {reg3}"), "FEqD"); +} + +TEST_F(AssemblerRISCV64Test, FLtS) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLtS, "flt.s {reg1}, {reg2}, {reg3}"), "FLtS"); +} + +TEST_F(AssemblerRISCV64Test, FLtD) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLtD, "flt.d {reg1}, {reg2}, {reg3}"), "FLtD"); +} + +TEST_F(AssemblerRISCV64Test, FLeS) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLeS, "fle.s {reg1}, {reg2}, {reg3}"), "FLeS"); +} + +TEST_F(AssemblerRISCV64Test, FLeD) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLeD, "fle.d {reg1}, {reg2}, {reg3}"), "FLeD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}, {rm}"), + "FCvtWS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}"), "FCvtWS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}, {rm}"), + "FCvtWD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}"), "FCvtWD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}, {rm}"), + "FCvtWuS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}"), "FCvtWuS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}, {rm}"), + "FCvtWuD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}"), "FCvtWuD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}, {rm}"), + "FCvtLS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}"), "FCvtLS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}, {rm}"), + "FCvtLD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}"), "FCvtLD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}, {rm}"), + "FCvtLuS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}"), "FCvtLuS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}, {rm}"), + "FCvtLuD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}"), "FCvtLuD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSW) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}, {rm}"), + "FCvtSW"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSW_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}"), "FCvtSW_Default"); +} + +// This conversion is lossless, so the rounding mode is meaningless and the assembler we're +// testing against does not even accept the rounding mode argument, so this test is disabled. +TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDW) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}, {rm}"), + "FCvtDW"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDW_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}"), "FCvtDW_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSWu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}, {rm}"), + "FCvtSWu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSWu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}"), "FCvtSWu_Default"); +} + +// This conversion is lossless, so the rounding mode is meaningless and the assembler we're +// testing against does not even accept the rounding mode argument, so this test is disabled. +TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDWu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}, {rm}"), + "FCvtDWu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDWu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}"), "FCvtDWu_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSL) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}, {rm}"), + "FCvtSL"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSL_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}"), "FCvtSL_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDL) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}, {rm}"), + "FCvtDL"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDL_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}"), "FCvtDL_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSLu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}, {rm}"), + "FCvtSLu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSLu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}"), "FCvtSLu_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDLu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}, {rm}"), + "FCvtDLu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDLu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}"), "FCvtDLu_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMvXW) { + DriverStr(RepeatRF(&Riscv64Assembler::FMvXW, "fmv.x.w {reg1}, {reg2}"), "FMvXW"); +} + +TEST_F(AssemblerRISCV64Test, FMvXD) { + DriverStr(RepeatRF(&Riscv64Assembler::FMvXD, "fmv.x.d {reg1}, {reg2}"), "FMvXD"); +} + +TEST_F(AssemblerRISCV64Test, FMvWX) { + DriverStr(RepeatFR(&Riscv64Assembler::FMvWX, "fmv.w.x {reg1}, {reg2}"), "FMvWX"); +} + +TEST_F(AssemblerRISCV64Test, FMvDX) { + DriverStr(RepeatFR(&Riscv64Assembler::FMvDX, "fmv.d.x {reg1}, {reg2}"), "FMvDX"); +} + +TEST_F(AssemblerRISCV64Test, FClassS) { + DriverStr(RepeatRF(&Riscv64Assembler::FClassS, "fclass.s {reg1}, {reg2}"), "FClassS"); +} + +TEST_F(AssemblerRISCV64Test, FClassD) { + DriverStr(RepeatrF(&Riscv64Assembler::FClassD, "fclass.d {reg1}, {reg2}"), "FClassD"); +} + +TEST_F(AssemblerRISCV64Test, AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::AddUw, "add.uw {reg1}, {reg2}, {reg3}"), "AddUw"); +} + +TEST_F(AssemblerRISCV64Test, Sh1Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh1Add, "sh1add {reg1}, {reg2}, {reg3}"), "Sh1Add"); +} + +TEST_F(AssemblerRISCV64Test, Sh1AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh1AddUw, "sh1add.uw {reg1}, {reg2}, {reg3}"), "Sh1AddUw"); +} + +TEST_F(AssemblerRISCV64Test, Sh2Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh2Add, "sh2add {reg1}, {reg2}, {reg3}"), "Sh2Add"); +} + +TEST_F(AssemblerRISCV64Test, Sh2AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh2AddUw, "sh2add.uw {reg1}, {reg2}, {reg3}"), "Sh2AddUw"); +} + +TEST_F(AssemblerRISCV64Test, Sh3Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh3Add, "sh3add {reg1}, {reg2}, {reg3}"), "Sh3Add"); +} + +TEST_F(AssemblerRISCV64Test, Sh3AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh3AddUw, "sh3add.uw {reg1}, {reg2}, {reg3}"), "Sh3AddUw"); +} + +TEST_F(AssemblerRISCV64Test, SlliUw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::SlliUw, 6, "slli.uw {reg1}, {reg2}, {imm}"), "SlliUw"); +} + +TEST_F(AssemblerRISCV64Test, Andn) { + DriverStr(RepeatRRR(&Riscv64Assembler::Andn, "andn {reg1}, {reg2}, {reg3}"), "Andn"); +} + +TEST_F(AssemblerRISCV64Test, Orn) { + DriverStr(RepeatRRR(&Riscv64Assembler::Orn, "orn {reg1}, {reg2}, {reg3}"), "Orn"); +} + +TEST_F(AssemblerRISCV64Test, Xnor) { + DriverStr(RepeatRRR(&Riscv64Assembler::Xnor, "xnor {reg1}, {reg2}, {reg3}"), "Xnor"); +} + +TEST_F(AssemblerRISCV64Test, Clz) { + DriverStr(RepeatRR(&Riscv64Assembler::Clz, "clz {reg1}, {reg2}"), "Clz"); +} + +TEST_F(AssemblerRISCV64Test, Clzw) { + DriverStr(RepeatRR(&Riscv64Assembler::Clzw, "clzw {reg1}, {reg2}"), "Clzw"); +} + +TEST_F(AssemblerRISCV64Test, Ctz) { + DriverStr(RepeatRR(&Riscv64Assembler::Ctz, "ctz {reg1}, {reg2}"), "Ctz"); +} + +TEST_F(AssemblerRISCV64Test, Ctzw) { + DriverStr(RepeatRR(&Riscv64Assembler::Ctzw, "ctzw {reg1}, {reg2}"), "Ctzw"); +} + +TEST_F(AssemblerRISCV64Test, Cpop) { + DriverStr(RepeatRR(&Riscv64Assembler::Cpop, "cpop {reg1}, {reg2}"), "Cpop"); +} + +TEST_F(AssemblerRISCV64Test, Cpopw) { + DriverStr(RepeatRR(&Riscv64Assembler::Cpopw, "cpopw {reg1}, {reg2}"), "Cpopw"); +} + +TEST_F(AssemblerRISCV64Test, Min) { + DriverStr(RepeatRRR(&Riscv64Assembler::Min, "min {reg1}, {reg2}, {reg3}"), "Min"); +} + +TEST_F(AssemblerRISCV64Test, Minu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Minu, "minu {reg1}, {reg2}, {reg3}"), "Minu"); +} + +TEST_F(AssemblerRISCV64Test, Max) { + DriverStr(RepeatRRR(&Riscv64Assembler::Max, "max {reg1}, {reg2}, {reg3}"), "Max"); +} + +TEST_F(AssemblerRISCV64Test, Maxu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Maxu, "maxu {reg1}, {reg2}, {reg3}"), "Maxu"); +} + +TEST_F(AssemblerRISCV64Test, Rol) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rol, "rol {reg1}, {reg2}, {reg3}"), "Rol"); +} + +TEST_F(AssemblerRISCV64Test, Rolw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rolw, "rolw {reg1}, {reg2}, {reg3}"), "Rolw"); +} + +TEST_F(AssemblerRISCV64Test, Ror) { + DriverStr(RepeatRRR(&Riscv64Assembler::Ror, "ror {reg1}, {reg2}, {reg3}"), "Ror"); +} + +TEST_F(AssemblerRISCV64Test, Rorw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rorw, "rorw {reg1}, {reg2}, {reg3}"), "Rorw"); +} + +TEST_F(AssemblerRISCV64Test, Rori) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Rori, 6, "rori {reg1}, {reg2}, {imm}"), "Rori"); +} + +TEST_F(AssemblerRISCV64Test, Roriw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Roriw, 5, "roriw {reg1}, {reg2}, {imm}"), "Roriw"); +} + +TEST_F(AssemblerRISCV64Test, OrcB) { + DriverStr(RepeatRR(&Riscv64Assembler::OrcB, "orc.b {reg1}, {reg2}"), "OrcB"); +} + +TEST_F(AssemblerRISCV64Test, Rev8) { + DriverStr(RepeatRR(&Riscv64Assembler::Rev8, "rev8 {reg1}, {reg2}"), "Rev8"); +} + +// Pseudo instructions. +TEST_F(AssemblerRISCV64Test, Nop) { + __ Nop(); + DriverStr("addi zero,zero,0", "Nop"); +} + +TEST_F(AssemblerRISCV64Test, Li) { + SetUseSimpleMarch(true); + TestLoadConst64("Li", + /*can_use_tmp=*/ false, + [&](XRegister rd, int64_t value) { __ Li(rd, value); }); +} + +TEST_F(AssemblerRISCV64Test, Mv) { + DriverStr(RepeatRR(&Riscv64Assembler::Mv, "addi {reg1}, {reg2}, 0"), "Mv"); +} + +TEST_F(AssemblerRISCV64Test, Not) { + DriverStr(RepeatRR(&Riscv64Assembler::Not, "xori {reg1}, {reg2}, -1"), "Not"); +} + +TEST_F(AssemblerRISCV64Test, Neg) { + DriverStr(RepeatRR(&Riscv64Assembler::Neg, "sub {reg1}, x0, {reg2}"), "Neg"); +} + +TEST_F(AssemblerRISCV64Test, NegW) { + DriverStr(RepeatRR(&Riscv64Assembler::NegW, "subw {reg1}, x0, {reg2}"), "Neg"); +} + +TEST_F(AssemblerRISCV64Test, SextB) { + // Note: SEXT.B from the Zbb extension is not supported. + DriverStr(RepeatRR(&Riscv64Assembler::SextB, + "slli {reg1}, {reg2}, 56\n" + "srai {reg1}, {reg1}, 56"), + "SextB"); +} + +TEST_F(AssemblerRISCV64Test, SextH) { + // Note: SEXT.H from the Zbb extension is not supported. + DriverStr(RepeatRR(&Riscv64Assembler::SextH, + "slli {reg1}, {reg2}, 48\n" + "srai {reg1}, {reg1}, 48"), + "SextH"); +} + +TEST_F(AssemblerRISCV64Test, SextW) { + DriverStr(RepeatRR(&Riscv64Assembler::SextW, "addiw {reg1}, {reg2}, 0\n"), "SextW"); +} + +TEST_F(AssemblerRISCV64Test, ZextB) { + DriverStr(RepeatRR(&Riscv64Assembler::ZextB, "andi {reg1}, {reg2}, 255"), "ZextB"); +} + +TEST_F(AssemblerRISCV64Test, ZextH) { + // Note: ZEXT.H from the Zbb extension is not supported. + DriverStr(RepeatRR(&Riscv64Assembler::ZextH, + "slli {reg1}, {reg2}, 48\n" + "srli {reg1}, {reg1}, 48"), + "SextH"); +} + +TEST_F(AssemblerRISCV64Test, ZextW) { + DriverStr(RepeatRR(&Riscv64Assembler::ZextW, + "slli {reg1}, {reg2}, 32\n" + "srli {reg1}, {reg1}, 32"), + "ZextW"); +} + +TEST_F(AssemblerRISCV64Test, Seqz) { + DriverStr(RepeatRR(&Riscv64Assembler::Seqz, "sltiu {reg1}, {reg2}, 1\n"), "Seqz"); +} + +TEST_F(AssemblerRISCV64Test, Snez) { + DriverStr(RepeatRR(&Riscv64Assembler::Snez, "sltu {reg1}, zero, {reg2}\n"), "Snez"); +} + +TEST_F(AssemblerRISCV64Test, Sltz) { + DriverStr(RepeatRR(&Riscv64Assembler::Sltz, "slt {reg1}, {reg2}, zero\n"), "Sltz"); +} + +TEST_F(AssemblerRISCV64Test, Sgtz) { + DriverStr(RepeatRR(&Riscv64Assembler::Sgtz, "slt {reg1}, zero, {reg2}\n"), "Sgtz"); +} + +TEST_F(AssemblerRISCV64Test, FMvS) { + DriverStr(RepeatFF(&Riscv64Assembler::FMvS, "fsgnj.s {reg1}, {reg2}, {reg2}\n"), "FMvS"); +} + +TEST_F(AssemblerRISCV64Test, FAbsS) { + DriverStr(RepeatFF(&Riscv64Assembler::FAbsS, "fsgnjx.s {reg1}, {reg2}, {reg2}\n"), "FAbsS"); +} + +TEST_F(AssemblerRISCV64Test, FNegS) { + DriverStr(RepeatFF(&Riscv64Assembler::FNegS, "fsgnjn.s {reg1}, {reg2}, {reg2}\n"), "FNegS"); +} + +TEST_F(AssemblerRISCV64Test, FMvD) { + DriverStr(RepeatFF(&Riscv64Assembler::FMvD, "fsgnj.d {reg1}, {reg2}, {reg2}\n"), "FMvD"); +} + +TEST_F(AssemblerRISCV64Test, FAbsD) { + DriverStr(RepeatFF(&Riscv64Assembler::FAbsD, "fsgnjx.d {reg1}, {reg2}, {reg2}\n"), "FAbsD"); +} + +TEST_F(AssemblerRISCV64Test, FNegD) { + DriverStr(RepeatFF(&Riscv64Assembler::FNegD, "fsgnjn.d {reg1}, {reg2}, {reg2}\n"), "FNegD"); +} + +TEST_F(AssemblerRISCV64Test, Beqz) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Beqz, -11, 2, "beq {reg}, zero, {imm}\n"), "Beqz"); +} + +TEST_F(AssemblerRISCV64Test, Bnez) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bnez, -11, 2, "bne {reg}, zero, {imm}\n"), "Bnez"); +} + +TEST_F(AssemblerRISCV64Test, Blez) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Blez, -11, 2, "bge zero, {reg}, {imm}\n"), "Blez"); +} + +TEST_F(AssemblerRISCV64Test, Bgez) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bgez, -11, 2, "bge {reg}, zero, {imm}\n"), "Bgez"); +} + +TEST_F(AssemblerRISCV64Test, Bltz) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bltz, -11, 2, "blt {reg}, zero, {imm}\n"), "Bltz"); +} + +TEST_F(AssemblerRISCV64Test, Bgtz) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bgtz, -11, 2, "blt zero, {reg}, {imm}\n"), "Bgtz"); +} + +TEST_F(AssemblerRISCV64Test, Bgt) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgt, -11, 2, "blt {reg2}, {reg1}, {imm}\n"), "Bgt"); +} + +TEST_F(AssemblerRISCV64Test, Ble) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Ble, -11, 2, "bge {reg2}, {reg1}, {imm}\n"), "Bge"); +} + +TEST_F(AssemblerRISCV64Test, Bgtu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgtu, -11, 2, "bltu {reg2}, {reg1}, {imm}\n"), "Bgtu"); +} + +TEST_F(AssemblerRISCV64Test, Bleu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bleu, -11, 2, "bgeu {reg2}, {reg1}, {imm}\n"), "Bgeu"); +} + +TEST_F(AssemblerRISCV64Test, J) { + // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension. + DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::J, -19, 2, "j {imm}\n"), "J"); +} + +TEST_F(AssemblerRISCV64Test, JalRA) { + // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension. + DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::Jal, -19, 2, "jal {imm}\n"), "JalRA"); +} + +TEST_F(AssemblerRISCV64Test, Jr) { + DriverStr(RepeatR(&Riscv64Assembler::Jr, "jr {reg}\n"), "Jr"); +} + +TEST_F(AssemblerRISCV64Test, JalrRA) { + DriverStr(RepeatR(&Riscv64Assembler::Jalr, "jalr {reg}\n"), "JalrRA"); +} + +TEST_F(AssemblerRISCV64Test, Jalr0) { + DriverStr(RepeatRR(&Riscv64Assembler::Jalr, "jalr {reg1}, {reg2}\n"), "Jalr0"); +} + +TEST_F(AssemblerRISCV64Test, Ret) { + __ Ret(); + DriverStr("ret\n", "Ret"); +} + +TEST_F(AssemblerRISCV64Test, RdCycle) { + DriverStr(RepeatR(&Riscv64Assembler::RdCycle, "rdcycle {reg}\n"), "RdCycle"); +} + +TEST_F(AssemblerRISCV64Test, RdTime) { + DriverStr(RepeatR(&Riscv64Assembler::RdTime, "rdtime {reg}\n"), "RdTime"); +} + +TEST_F(AssemblerRISCV64Test, RdInstret) { + DriverStr(RepeatR(&Riscv64Assembler::RdInstret, "rdinstret {reg}\n"), "RdInstret"); +} + +TEST_F(AssemblerRISCV64Test, Csrr) { + TestCsrrXMacro( + "Csrr", "csrr {reg}, {csr}", [&](uint32_t csr, XRegister rd) { __ Csrr(rd, csr); }); +} + +TEST_F(AssemblerRISCV64Test, Csrw) { + TestCsrrXMacro( + "Csrw", "csrw {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrw(csr, rs); }); +} + +TEST_F(AssemblerRISCV64Test, Csrs) { + TestCsrrXMacro( + "Csrs", "csrs {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrs(csr, rs); }); +} + +TEST_F(AssemblerRISCV64Test, Csrc) { + TestCsrrXMacro( + "Csrc", "csrc {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrc(csr, rs); }); +} + +TEST_F(AssemblerRISCV64Test, Csrwi) { + TestCsrrXiMacro( + "Csrwi", "csrwi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrwi(csr, uimm); }); +} + +TEST_F(AssemblerRISCV64Test, Csrsi) { + TestCsrrXiMacro( + "Csrsi", "csrsi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrsi(csr, uimm); }); +} + +TEST_F(AssemblerRISCV64Test, Csrci) { + TestCsrrXiMacro( + "Csrci", "csrci {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrci(csr, uimm); }); +} + +TEST_F(AssemblerRISCV64Test, LoadConst32) { + // `LoadConst32()` emits the same code sequences as `Li()` for 32-bit values. + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(TMP); + srs.ExcludeXRegister(TMP2); + DriverStr(RepeatRIb(&Riscv64Assembler::LoadConst32, -32, "li {reg}, {imm}"), "LoadConst32"); +} + +TEST_F(AssemblerRISCV64Test, LoadConst64) { + SetUseSimpleMarch(true); + TestLoadConst64("LoadConst64", + /*can_use_tmp=*/ true, + [&](XRegister rd, int64_t value) { __ LoadConst64(rd, value); }); +} + +TEST_F(AssemblerRISCV64Test, AddConst32) { + auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) { + __ AddConst32(rd, rs1, dchecked_integral_cast<int32_t>(value)); + }; + TestAddConst("AddConst32", 32, /*suffix=*/ "w", emit_op); +} + +TEST_F(AssemblerRISCV64Test, AddConst64) { + SetUseSimpleMarch(true); + auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) { + __ AddConst64(rd, rs1, value); + }; + TestAddConst("AddConst64", 64, /*suffix=*/ "", emit_op); +} + +TEST_F(AssemblerRISCV64Test, BcondForward3KiB) { + TestBcondForward("BcondForward3KiB", 3 * KB, "1", GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BcondForward3KiBBare) { + TestBcondForward("BcondForward3KiB", 3 * KB, "1", GetPrintBcond(), /*is_bare=*/ true); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward3KiB) { + TestBcondBackward("BcondBackward3KiB", 3 * KB, "1", GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward3KiBBare) { + TestBcondBackward("BcondBackward3KiB", 3 * KB, "1", GetPrintBcond(), /*is_bare=*/ true); +} + +TEST_F(AssemblerRISCV64Test, BcondForward5KiB) { + TestBcondForward("BcondForward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward5KiB) { + TestBcondBackward("BcondBackward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BcondForward2MiB) { + TestBcondForward("BcondForward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward2MiB) { + TestBcondBackward("BcondBackward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Forward) { + TestBeqA0A1Forward("BeqA0A1MaxOffset13Forward", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u, + "1", + GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13ForwardBare) { + TestBeqA0A1Forward("BeqA0A1MaxOffset13ForwardBare", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u, + "1", + GetPrintBcond(), + /*is_bare=*/ true); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Backward) { + TestBeqA0A1Backward("BeqA0A1MaxOffset13Forward", + MaxOffset13BackwardDistance(), + "1", + GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13BackwardBare) { + TestBeqA0A1Backward("BeqA0A1MaxOffset13ForwardBare", + MaxOffset13BackwardDistance(), + "1", + GetPrintBcond(), + /*is_bare=*/ true); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Forward) { + TestBeqA0A1Forward("BeqA0A1OverMaxOffset13Forward", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Backward) { + TestBeqA0A1Backward("BeqA0A1OverMaxOffset13Forward", + MaxOffset13BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Forward) { + TestBeqA0A1Forward("BeqA0A1MaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Backward) { + TestBeqA0A1Backward("BeqA0A1MaxOffset21Backward", + MaxOffset21BackwardDistance() - /*BNE*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Forward) { + TestBeqA0A1Forward("BeqA0A1OverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Backward) { + TestBeqA0A1Backward("BeqA0A1OverMaxOffset21Backward", + MaxOffset21BackwardDistance() - /*BNE*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1AlmostCascade) { + TestBeqA0A1MaybeCascade("BeqA0A1AlmostCascade", /*cascade=*/ false, GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1Cascade) { + TestBeqA0A1MaybeCascade( + "BeqA0A1AlmostCascade", /*cascade=*/ true, GetPrintBcondOppositeAndJ("1")); +} + +TEST_F(AssemblerRISCV64Test, BcondElimination) { + Riscv64Label label; + __ Bind(&label); + __ Nop(); + for (XRegister reg : GetRegisters()) { + __ Bne(reg, reg, &label); + __ Blt(reg, reg, &label); + __ Bgt(reg, reg, &label); + __ Bltu(reg, reg, &label); + __ Bgtu(reg, reg, &label); + } + DriverStr("nop\n", "BcondElimination"); +} + +TEST_F(AssemblerRISCV64Test, BcondUnconditional) { + Riscv64Label label; + __ Bind(&label); + __ Nop(); + for (XRegister reg : GetRegisters()) { + __ Beq(reg, reg, &label); + __ Bge(reg, reg, &label); + __ Ble(reg, reg, &label); + __ Bleu(reg, reg, &label); + __ Bgeu(reg, reg, &label); + } + std::string expected = + "1:\n" + "nop\n" + + RepeatInsn(5u * GetRegisters().size(), "j 1b\n", []() {}); + DriverStr(expected, "BcondUnconditional"); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward3KiB) { + TestJalRdForward("JalRdForward3KiB", 3 * KB, "1", GetPrintJalRd()); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward3KiBBare) { + TestJalRdForward("JalRdForward3KiB", 3 * KB, "1", GetPrintJalRd(), /*is_bare=*/ true); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward3KiB) { + TestJalRdBackward("JalRdBackward3KiB", 3 * KB, "1", GetPrintJalRd()); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward3KiBBare) { + TestJalRdBackward("JalRdBackward3KiB", 3 * KB, "1", GetPrintJalRd(), /*is_bare=*/ true); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward2MiB) { + TestJalRdForward("JalRdForward2MiB", 2 * MB, "1", GetPrintCallRd("2")); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward2MiB) { + TestJalRdBackward("JalRdBackward2MiB", 2 * MB, "1", GetPrintCallRd("2")); +} + +TEST_F(AssemblerRISCV64Test, JForward3KiB) { + TestBuncondForward("JForward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JForward3KiBBare) { + TestBuncondForward("JForward3KiB", 3 * KB, "1", GetEmitJ(/*is_bare=*/ true), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JBackward3KiB) { + TestBuncondBackward("JBackward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JBackward3KiBBare) { + TestBuncondBackward("JBackward3KiB", 3 * KB, "1", GetEmitJ(/*is_bare=*/ true), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JForward2MiB) { + TestBuncondForward("JForward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JBackward2MiB) { + TestBuncondBackward("JBackward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21Forward) { + TestBuncondForward("JMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJ(), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21ForwardBare) { + TestBuncondForward("JMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJ(/*is_bare=*/ true), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21Backward) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJ(), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21BackwardBare) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJ(/*is_bare=*/ true), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Forward) { + TestBuncondForward("JOverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetEmitJ(), + GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Backward) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetEmitJ(), + GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, CallForward3KiB) { + TestBuncondForward("CallForward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallBackward3KiB) { + TestBuncondBackward("CallBackward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallForward2MiB) { + TestBuncondForward("CallForward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallBackward2MiB) { + TestBuncondBackward("CallBackward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallMaxOffset21Forward) { + TestBuncondForward("CallMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJal(), + GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallMaxOffset21Backward) { + TestBuncondBackward("CallMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJal(), + GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Forward) { + TestBuncondForward("CallOverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetEmitJal(), + GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Backward) { + TestBuncondBackward("CallMaxOffset21Backward", + MaxOffset21BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetEmitJal(), + GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, Loadb) { + TestLoadStoreArbitraryOffset("Loadb", "lb", &Riscv64Assembler::Loadb, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadh) { + TestLoadStoreArbitraryOffset("Loadh", "lh", &Riscv64Assembler::Loadh, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadw) { + TestLoadStoreArbitraryOffset("Loadw", "lw", &Riscv64Assembler::Loadw, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadd) { + TestLoadStoreArbitraryOffset("Loadd", "ld", &Riscv64Assembler::Loadd, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadbu) { + TestLoadStoreArbitraryOffset("Loadbu", "lbu", &Riscv64Assembler::Loadbu, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadhu) { + TestLoadStoreArbitraryOffset("Loadhu", "lhu", &Riscv64Assembler::Loadhu, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadwu) { + TestLoadStoreArbitraryOffset("Loadwu", "lwu", &Riscv64Assembler::Loadwu, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Storeb) { + TestLoadStoreArbitraryOffset("Storeb", "sb", &Riscv64Assembler::Storeb, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, Storeh) { + TestLoadStoreArbitraryOffset("Storeh", "sh", &Riscv64Assembler::Storeh, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, Storew) { + TestLoadStoreArbitraryOffset("Storew", "sw", &Riscv64Assembler::Storew, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, Stored) { + TestLoadStoreArbitraryOffset("Stored", "sd", &Riscv64Assembler::Stored, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, FLoadw) { + TestFPLoadStoreArbitraryOffset("FLoadw", "flw", &Riscv64Assembler::FLoadw); +} + +TEST_F(AssemblerRISCV64Test, FLoadd) { + TestFPLoadStoreArbitraryOffset("FLoadd", "fld", &Riscv64Assembler::FLoadd); +} + +TEST_F(AssemblerRISCV64Test, FStorew) { + TestFPLoadStoreArbitraryOffset("FStorew", "fsw", &Riscv64Assembler::FStorew); +} + +TEST_F(AssemblerRISCV64Test, FStored) { + TestFPLoadStoreArbitraryOffset("FStored", "fsd", &Riscv64Assembler::FStored); +} + +TEST_F(AssemblerRISCV64Test, Unimp) { + __ Unimp(); + DriverStr("unimp\n", "Unimp"); +} + +TEST_F(AssemblerRISCV64Test, LoadLabelAddress) { + std::string expected; + constexpr size_t kNumLoadsForward = 4 * KB; + constexpr size_t kNumLoadsBackward = 4 * KB; + Riscv64Label label; + auto emit_batch = [&](size_t num_loads, const std::string& target_label) { + for (size_t i = 0; i != num_loads; ++i) { + // Cycle through non-Zero registers. + XRegister rd = enum_cast<XRegister>((i % (kNumberOfXRegisters - 1)) + 1); + DCHECK_NE(rd, Zero); + std::string rd_name = GetRegisterName(rd); + __ LoadLabelAddress(rd, &label); + expected += "1:\n"; + expected += ART_FORMAT("auipc {}, %pcrel_hi({})\n", rd_name, target_label); + expected += ART_FORMAT("addi {}, {}, %pcrel_lo(1b)\n", rd_name, rd_name); + } + }; + emit_batch(kNumLoadsForward, "2f"); + __ Bind(&label); + expected += "2:\n"; + emit_batch(kNumLoadsBackward, "2b"); + DriverStr(expected, "LoadLabelAddress"); +} + +TEST_F(AssemblerRISCV64Test, LoadLiteralWithPaddingForLong) { + TestLoadLiteral("LoadLiteralWithPaddingForLong", /*with_padding_for_long=*/ true); +} + +TEST_F(AssemblerRISCV64Test, LoadLiteralWithoutPaddingForLong) { + TestLoadLiteral("LoadLiteralWithoutPaddingForLong", /*with_padding_for_long=*/ false); +} + +TEST_F(AssemblerRISCV64Test, JumpTable) { + std::string expected; + expected += EmitNops(sizeof(uint32_t)); + Riscv64Label targets[4]; + uint32_t target_locations[4]; + JumpTable* jump_table = __ CreateJumpTable(ArenaVector<Riscv64Label*>( + {&targets[0], &targets[1], &targets[2], &targets[3]}, __ GetAllocator()->Adapter())); + for (size_t i : {0, 1, 2, 3}) { + target_locations[i] = __ CodeSize(); + __ Bind(&targets[i]); + expected += std::to_string(i) + ":\n"; + expected += EmitNops(sizeof(uint32_t)); + } + __ LoadLabelAddress(A0, jump_table->GetLabel()); + expected += "4:\n" + "auipc a0, %pcrel_hi(5f)\n" + "addi a0, a0, %pcrel_lo(4b)\n"; + expected += EmitNops(sizeof(uint32_t)); + uint32_t label5_location = __ CodeSize(); + auto target_offset = [&](size_t i) { + // Even with `-mno-relax`, clang assembler does not fully resolve `.4byte 0b - 5b` + // and emits a relocation, so we need to calculate target offsets ourselves. + return std::to_string(static_cast<int64_t>(target_locations[i] - label5_location)); + }; + expected += "5:\n" + ".4byte " + target_offset(0) + "\n" + ".4byte " + target_offset(1) + "\n" + ".4byte " + target_offset(2) + "\n" + ".4byte " + target_offset(3) + "\n"; + DriverStr(expected, "JumpTable"); +} + +TEST_F(AssemblerRISCV64Test, ScratchRegisters) { + ScratchRegisterScope srs(GetAssembler()); + ASSERT_EQ(2u, srs.AvailableXRegisters()); // Default: TMP(T6) and TMP2(T5). + ASSERT_EQ(1u, srs.AvailableFRegisters()); // Default: FTMP(FT11). + + XRegister tmp = srs.AllocateXRegister(); + EXPECT_EQ(TMP, tmp); + XRegister tmp2 = srs.AllocateXRegister(); + EXPECT_EQ(TMP2, tmp2); + ASSERT_EQ(0u, srs.AvailableXRegisters()); + + FRegister ftmp = srs.AllocateFRegister(); + EXPECT_EQ(FTMP, ftmp); + ASSERT_EQ(0u, srs.AvailableFRegisters()); + + // Test nesting. + srs.FreeXRegister(A0); + srs.FreeXRegister(A1); + srs.FreeFRegister(FA0); + srs.FreeFRegister(FA1); + ASSERT_EQ(2u, srs.AvailableXRegisters()); + ASSERT_EQ(2u, srs.AvailableFRegisters()); + { + ScratchRegisterScope srs2(GetAssembler()); + ASSERT_EQ(2u, srs2.AvailableXRegisters()); + ASSERT_EQ(2u, srs2.AvailableFRegisters()); + XRegister a1 = srs2.AllocateXRegister(); + EXPECT_EQ(A1, a1); + XRegister a0 = srs2.AllocateXRegister(); + EXPECT_EQ(A0, a0); + ASSERT_EQ(0u, srs2.AvailableXRegisters()); + FRegister fa1 = srs2.AllocateFRegister(); + EXPECT_EQ(FA1, fa1); + FRegister fa0 = srs2.AllocateFRegister(); + EXPECT_EQ(FA0, fa0); + ASSERT_EQ(0u, srs2.AvailableFRegisters()); + } + ASSERT_EQ(2u, srs.AvailableXRegisters()); + ASSERT_EQ(2u, srs.AvailableFRegisters()); + + srs.IncludeXRegister(A0); // No-op as the register was already available. + ASSERT_EQ(2u, srs.AvailableXRegisters()); + srs.IncludeFRegister(FA0); // No-op as the register was already available. + ASSERT_EQ(2u, srs.AvailableFRegisters()); + srs.IncludeXRegister(S0); + ASSERT_EQ(3u, srs.AvailableXRegisters()); + srs.IncludeFRegister(FS0); + ASSERT_EQ(3u, srs.AvailableFRegisters()); + + srs.ExcludeXRegister(S1); // No-op as the register was not available. + ASSERT_EQ(3u, srs.AvailableXRegisters()); + srs.ExcludeFRegister(FS1); // No-op as the register was not available. + ASSERT_EQ(3u, srs.AvailableFRegisters()); + srs.ExcludeXRegister(A0); + ASSERT_EQ(2u, srs.AvailableXRegisters()); + srs.ExcludeFRegister(FA0); + ASSERT_EQ(2u, srs.AvailableFRegisters()); +} + +#undef __ + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc new file mode 100644 index 0000000000..9d3a29d252 --- /dev/null +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc @@ -0,0 +1,646 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_riscv64.h" + +#include "base/bit_utils_iterator.h" +#include "dwarf/register.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "gc_root.h" +#include "indirect_reference_table.h" +#include "lock_word.h" +#include "managed_register_riscv64.h" +#include "offsets.h" +#include "stack_reference.h" +#include "thread.h" + +namespace art HIDDEN { +namespace riscv64 { + +static constexpr size_t kSpillSize = 8; // Both GPRs and FPRs + +static std::pair<uint32_t, uint32_t> GetCoreAndFpSpillMasks( + ArrayRef<const ManagedRegister> callee_save_regs) { + uint32_t core_spill_mask = 0u; + uint32_t fp_spill_mask = 0u; + for (ManagedRegister r : callee_save_regs) { + Riscv64ManagedRegister reg = r.AsRiscv64(); + if (reg.IsXRegister()) { + core_spill_mask |= 1u << reg.AsXRegister(); + } else { + DCHECK(reg.IsFRegister()); + fp_spill_mask |= 1u << reg.AsFRegister(); + } + } + DCHECK_EQ(callee_save_regs.size(), + dchecked_integral_cast<size_t>(POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask))); + return {core_spill_mask, fp_spill_mask}; +} + +#define __ asm_. + +Riscv64JNIMacroAssembler::~Riscv64JNIMacroAssembler() { +} + +void Riscv64JNIMacroAssembler::FinalizeCode() { + __ FinalizeCode(); +} + +void Riscv64JNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs) { + // Increase frame to required size. + DCHECK_ALIGNED(frame_size, kStackAlignment); + // Must at least have space for Method* if we're going to spill it. + DCHECK_GE(frame_size, + (callee_save_regs.size() + (method_reg.IsRegister() ? 1u : 0u)) * kSpillSize); + IncreaseFrameSize(frame_size); + + // Save callee-saves. + auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs); + size_t offset = frame_size; + if ((core_spill_mask & (1u << RA)) != 0u) { + offset -= kSpillSize; + __ Stored(RA, SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Core(RA), offset); + } + for (uint32_t reg : HighToLowBits(core_spill_mask & ~(1u << RA))) { + offset -= kSpillSize; + __ Stored(enum_cast<XRegister>(reg), SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg)), offset); + } + for (uint32_t reg : HighToLowBits(fp_spill_mask)) { + offset -= kSpillSize; + __ FStored(enum_cast<FRegister>(reg), SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg)), offset); + } + + if (method_reg.IsRegister()) { + // Write ArtMethod*. + DCHECK_EQ(A0, method_reg.AsRiscv64().AsXRegister()); + __ Stored(A0, SP, 0); + } +} + +void Riscv64JNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs, + [[maybe_unused]] bool may_suspend) { + cfi().RememberState(); + + // Restore callee-saves. + auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs); + size_t offset = frame_size - callee_save_regs.size() * kSpillSize; + for (uint32_t reg : LowToHighBits(fp_spill_mask)) { + __ FLoadd(enum_cast<FRegister>(reg), SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg))); + offset += kSpillSize; + } + for (uint32_t reg : LowToHighBits(core_spill_mask & ~(1u << RA))) { + __ Loadd(enum_cast<XRegister>(reg), SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg))); + offset += kSpillSize; + } + if ((core_spill_mask & (1u << RA)) != 0u) { + __ Loadd(RA, SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Core(RA)); + offset += kSpillSize; + } + DCHECK_EQ(offset, frame_size); + + // Decrease the frame size. + DecreaseFrameSize(frame_size); + + // Return to RA. + __ Ret(); + + // The CFI should be restored for any code that follows the exit block. + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(frame_size); +} + +void Riscv64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + if (adjust != 0u) { + CHECK_ALIGNED(adjust, kStackAlignment); + int64_t adjustment = dchecked_integral_cast<int64_t>(adjust); + __ AddConst64(SP, SP, -adjustment); + __ cfi().AdjustCFAOffset(adjustment); + } +} + +void Riscv64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + if (adjust != 0u) { + CHECK_ALIGNED(adjust, kStackAlignment); + int64_t adjustment = dchecked_integral_cast<int64_t>(adjust); + __ AddConst64(SP, SP, adjustment); + __ cfi().AdjustCFAOffset(-adjustment); + } +} + +ManagedRegister Riscv64JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) { + DCHECK(src.AsRiscv64().IsXRegister()); + DCHECK(size == 4u || size == 8u) << size; + return src; +} + +void Riscv64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) { + Store(Riscv64ManagedRegister::FromXRegister(SP), MemberOffset(offs.Int32Value()), m_src, size); +} + +void Riscv64JNIMacroAssembler::Store(ManagedRegister m_base, + MemberOffset offs, + ManagedRegister m_src, + size_t size) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + Riscv64ManagedRegister src = m_src.AsRiscv64(); + if (src.IsXRegister()) { + if (size == 4u) { + __ Storew(src.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ Stored(src.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } + } else { + CHECK(src.IsFRegister()) << src; + if (size == 4u) { + __ FStorew(src.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ FStored(src.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } + } +} + +void Riscv64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { + Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP); + Store(sp, MemberOffset(offs.Int32Value()), m_src, static_cast<size_t>(kRiscv64PointerSize)); +} + +void Riscv64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) { + XRegister src = SP; + ScratchRegisterScope srs(&asm_); + if (tag_sp) { + XRegister tmp = srs.AllocateXRegister(); + __ Ori(tmp, SP, 0x2); + src = tmp; + } + __ Stored(src, TR, offs.Int32Value()); +} + +void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest, FrameOffset offs, size_t size) { + Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP); + Load(m_dest, sp, MemberOffset(offs.Int32Value()), size); +} + +void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest, + ManagedRegister m_base, + MemberOffset offs, + size_t size) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + if (dest.IsXRegister()) { + if (size == 4u) { + // The riscv64 native calling convention specifies that integers narrower than XLEN (64) + // bits are "widened according to the sign of their type up to 32 bits, then sign-extended + // to XLEN bits." The managed ABI already passes integral values this way in registers + // and correctly widened to 32 bits on the stack. The `Load()` must sign-extend narrower + // types here to pass integral values correctly to the native call. + // For `float` args, the upper 32 bits are undefined, so this is fine for them as well. + __ Loadw(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ Loadd(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } + } else { + CHECK(dest.IsFRegister()) << dest; + if (size == 4u) { + __ FLoadw(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ FLoadd(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } + } +} + +void Riscv64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dest, ThreadOffset64 offs) { + Riscv64ManagedRegister tr = Riscv64ManagedRegister::FromXRegister(TR); + Load(m_dest, tr, MemberOffset(offs.Int32Value()), static_cast<size_t>(kRiscv64PointerSize)); +} + +void Riscv64JNIMacroAssembler::LoadGcRootWithoutReadBarrier(ManagedRegister m_dest, + ManagedRegister m_base, + MemberOffset offs) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>)); + __ Loadwu(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value()); +} + +void Riscv64JNIMacroAssembler::LoadStackReference(ManagedRegister m_dest, FrameOffset offs) { + // `StackReference<>` and `GcRoot<>` have the same underlying representation, namely + // `CompressedReference<>`. And `StackReference<>` does not need a read barrier. + static_assert(sizeof(uint32_t) == sizeof(mirror::CompressedReference<mirror::Object>)); + static_assert(sizeof(uint32_t) == sizeof(StackReference<mirror::Object>)); + static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>)); + LoadGcRootWithoutReadBarrier( + m_dest, Riscv64ManagedRegister::FromXRegister(SP), MemberOffset(offs.Int32Value())); +} + +void Riscv64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, + ArrayRef<ArgumentLocation> srcs, + ArrayRef<FrameOffset> refs) { + size_t arg_count = dests.size(); + DCHECK_EQ(arg_count, srcs.size()); + DCHECK_EQ(arg_count, refs.size()); + + auto get_mask = [](ManagedRegister reg) -> uint64_t { + Riscv64ManagedRegister riscv64_reg = reg.AsRiscv64(); + if (riscv64_reg.IsXRegister()) { + size_t core_reg_number = static_cast<size_t>(riscv64_reg.AsXRegister()); + DCHECK_LT(core_reg_number, 32u); + return UINT64_C(1) << core_reg_number; + } else { + DCHECK(riscv64_reg.IsFRegister()); + size_t fp_reg_number = static_cast<size_t>(riscv64_reg.AsFRegister()); + DCHECK_LT(fp_reg_number, 32u); + return (UINT64_C(1) << 32u) << fp_reg_number; + } + }; + + // Collect registers to move while storing/copying args to stack slots. + // Convert processed references to `jobject`. + uint64_t src_regs = 0u; + uint64_t dest_regs = 0u; + for (size_t i = 0; i != arg_count; ++i) { + const ArgumentLocation& src = srcs[i]; + const ArgumentLocation& dest = dests[i]; + const FrameOffset ref = refs[i]; + if (ref != kInvalidReferenceOffset) { + DCHECK_EQ(src.GetSize(), kObjectReferenceSize); + DCHECK_EQ(dest.GetSize(), static_cast<size_t>(kRiscv64PointerSize)); + } else { + DCHECK(src.GetSize() == 4u || src.GetSize() == 8u) << src.GetSize(); + DCHECK(dest.GetSize() == 4u || dest.GetSize() == 8u) << dest.GetSize(); + DCHECK_LE(src.GetSize(), dest.GetSize()); + } + if (dest.IsRegister()) { + if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) { + // No move is necessary but we may need to convert a reference to a `jobject`. + if (ref != kInvalidReferenceOffset) { + CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u); + } + } else { + if (src.IsRegister()) { + src_regs |= get_mask(src.GetRegister()); + } + dest_regs |= get_mask(dest.GetRegister()); + } + } else { + ScratchRegisterScope srs(&asm_); + Riscv64ManagedRegister reg = src.IsRegister() + ? src.GetRegister().AsRiscv64() + : Riscv64ManagedRegister::FromXRegister(srs.AllocateXRegister()); + if (!src.IsRegister()) { + if (ref != kInvalidReferenceOffset) { + // We're loading the reference only for comparison with null, so it does not matter + // if we sign- or zero-extend but let's correctly zero-extend the reference anyway. + __ Loadwu(reg.AsRiscv64().AsXRegister(), SP, src.GetFrameOffset().SizeValue()); + } else { + Load(reg, src.GetFrameOffset(), src.GetSize()); + } + } + if (ref != kInvalidReferenceOffset) { + DCHECK_NE(i, 0u); + CreateJObject(reg, ref, reg, /*null_allowed=*/ true); + } + Store(dest.GetFrameOffset(), reg, dest.GetSize()); + } + } + + // Fill destination registers. + // There should be no cycles, so this simple algorithm should make progress. + while (dest_regs != 0u) { + uint64_t old_dest_regs = dest_regs; + for (size_t i = 0; i != arg_count; ++i) { + const ArgumentLocation& src = srcs[i]; + const ArgumentLocation& dest = dests[i]; + const FrameOffset ref = refs[i]; + if (!dest.IsRegister()) { + continue; // Stored in first loop above. + } + uint64_t dest_reg_mask = get_mask(dest.GetRegister()); + if ((dest_reg_mask & dest_regs) == 0u) { + continue; // Equals source, or already filled in one of previous iterations. + } + if ((dest_reg_mask & src_regs) != 0u) { + continue; // Cannot clobber this register yet. + } + if (src.IsRegister()) { + if (ref != kInvalidReferenceOffset) { + DCHECK_NE(i, 0u); // The `this` arg remains in the same register (handled above). + CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ true); + } else { + Move(dest.GetRegister(), src.GetRegister(), dest.GetSize()); + } + src_regs &= ~get_mask(src.GetRegister()); // Allow clobbering source register. + } else { + Load(dest.GetRegister(), src.GetFrameOffset(), src.GetSize()); + // No `jobject` conversion needed. There are enough arg registers in managed ABI + // to hold all references that yield a register arg `jobject` in native ABI. + DCHECK_EQ(ref, kInvalidReferenceOffset); + } + dest_regs &= ~get_mask(dest.GetRegister()); // Destination register was filled. + } + CHECK_NE(old_dest_regs, dest_regs); + DCHECK_EQ(0u, dest_regs & ~old_dest_regs); + } +} + +void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, ManagedRegister m_src, size_t size) { + // Note: This function is used only for moving between GPRs. + // FP argument registers hold the same arguments in managed and native ABIs. + DCHECK(size == 4u || size == 8u) << size; + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + Riscv64ManagedRegister src = m_src.AsRiscv64(); + DCHECK(dest.IsXRegister()); + DCHECK(src.IsXRegister()); + if (!dest.Equals(src)) { + __ Mv(dest.AsXRegister(), src.AsXRegister()); + } +} + +void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, size_t value) { + DCHECK(m_dest.AsRiscv64().IsXRegister()); + __ LoadConst64(m_dest.AsRiscv64().AsXRegister(), dchecked_integral_cast<int64_t>(value)); +} + +void Riscv64JNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { + LOG(FATAL) << "The result is already sign-extended in the native ABI."; + UNREACHABLE(); +} + +void Riscv64JNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { + LOG(FATAL) << "The result is already zero-extended in the native ABI."; + UNREACHABLE(); +} + +void Riscv64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) { + DCHECK(dest.AsRiscv64().IsXRegister()); + __ Mv(dest.AsRiscv64().AsXRegister(), TR); +} + +void Riscv64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) { + __ Stored(TR, SP, offset.Int32Value()); +} + +void Riscv64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m_reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) { + // This implements the fast-path of `Thread::DecodeJObject()`. + constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask(); + DCHECK(IsInt<12>(kGlobalOrWeakGlobalMask)); + constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask(); + DCHECK(IsInt<12>(kIndirectRefKindMask)); + XRegister reg = m_reg.AsRiscv64().AsXRegister(); + __ Beqz(reg, Riscv64JNIMacroLabel::Cast(resume)->AsRiscv64()); // Skip test and load for null. + __ Andi(TMP, reg, kGlobalOrWeakGlobalMask); + __ Bnez(TMP, Riscv64JNIMacroLabel::Cast(slow_path)->AsRiscv64()); + __ Andi(reg, reg, ~kIndirectRefKindMask); + __ Loadwu(reg, reg, 0); +} + +void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister m_src, + [[maybe_unused]] bool could_be_null) { + // TODO: not validating references. +} + +void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src, + [[maybe_unused]] bool could_be_null) { + // TODO: not validating references. +} + +void Riscv64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + CHECK(base.IsXRegister()) << base; + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadd(tmp, base.AsXRegister(), offs.Int32Value()); + __ Jr(tmp); +} + +void Riscv64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + CHECK(base.IsXRegister()) << base; + __ Loadd(RA, base.AsXRegister(), offs.Int32Value()); + __ Jalr(RA); +} + + +void Riscv64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) { + Call(Riscv64ManagedRegister::FromXRegister(TR), offset); +} + +void Riscv64JNIMacroAssembler::TryToTransitionFromRunnableToNative( + JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs) { + constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); + constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); + constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset = + Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock); + + DCHECK_GE(scratch_regs.size(), 2u); + XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister(); + XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister(); + + // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags. + Riscv64Label retry; + __ Bind(&retry); + static_assert(thread_flags_offset.Int32Value() == 0); // LR/SC require exact address. + __ LrW(scratch, TR, AqRl::kNone); + __ Li(scratch2, kNativeStateValue); + // If any flags are set, go to the slow path. + static_assert(kRunnableStateValue == 0u); + __ Bnez(scratch, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + __ ScW(scratch, scratch2, TR, AqRl::kRelease); + __ Bnez(scratch, &retry); + + // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`. + __ Stored(Zero, TR, thread_held_mutex_mutator_lock_offset.Int32Value()); +} + +void Riscv64JNIMacroAssembler::TryToTransitionFromNativeToRunnable( + JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs, + ManagedRegister return_reg) { + constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); + constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); + constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset = + Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock); + constexpr ThreadOffset64 thread_mutator_lock_offset = + Thread::MutatorLockOffset<kRiscv64PointerSize>(); + + DCHECK_GE(scratch_regs.size(), 2u); + DCHECK(!scratch_regs[0].AsRiscv64().Overlaps(return_reg.AsRiscv64())); + XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister(); + DCHECK(!scratch_regs[1].AsRiscv64().Overlaps(return_reg.AsRiscv64())); + XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister(); + + // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags. + Riscv64Label retry; + __ Bind(&retry); + static_assert(thread_flags_offset.Int32Value() == 0); // LR/SC require exact address. + __ LrW(scratch, TR, AqRl::kAcquire); + __ Li(scratch2, kNativeStateValue); + // If any flags are set, or the state is not Native, go to the slow path. + // (While the thread can theoretically transition between different Suspended states, + // it would be very unexpected to see a state other than Native at this point.) + __ Bne(scratch, scratch2, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + static_assert(kRunnableStateValue == 0u); + __ ScW(scratch, Zero, TR, AqRl::kNone); + __ Bnez(scratch, &retry); + + // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock. + __ Loadd(scratch, TR, thread_mutator_lock_offset.Int32Value()); + __ Stored(scratch, TR, thread_held_mutex_mutator_lock_offset.Int32Value()); +} + +void Riscv64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) { + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value()); + DCHECK(IsInt<12>(dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags()))); + __ Andi(tmp, tmp, dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags())); + __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); +} + +void Riscv64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) { + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadd(tmp, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value()); + __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); +} + +void Riscv64JNIMacroAssembler::DeliverPendingException() { + // Pass exception object as argument. + // Don't care about preserving A0 as this won't return. + // Note: The scratch register from `ExceptionPoll()` may have been clobbered. + __ Loadd(A0, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value()); + __ Loadd(RA, TR, QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, pDeliverException).Int32Value()); + __ Jalr(RA); + // Call should never return. + __ Unimp(); +} + +std::unique_ptr<JNIMacroLabel> Riscv64JNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new Riscv64JNIMacroLabel()); +} + +void Riscv64JNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ J(down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); +} + +void Riscv64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) { + CHECK(label != nullptr); + + DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); + + ScratchRegisterScope srs(&asm_); + XRegister test_reg = srs.AllocateXRegister(); + int32_t is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value(); + __ Loadw(test_reg, TR, is_gc_marking_offset); + switch (cond) { + case JNIMacroUnaryCondition::kZero: + __ Beqz(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); + break; + case JNIMacroUnaryCondition::kNotZero: + __ Bnez(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); + break; + default: + LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond); + UNREACHABLE(); + } +} + +void Riscv64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref, + JNIMacroLabel* label, + JNIMacroUnaryCondition cond) { + XRegister ref = m_ref.AsRiscv64().AsXRegister(); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, ref, mirror::Object::MonitorOffset().Int32Value()); + // Move the bit we want to check to the sign bit, so that we can use BGEZ/BLTZ + // to check it. Extracting the bit for BEQZ/BNEZ would require one more instruction. + static_assert(LockWord::kMarkBitStateSize == 1u); + __ Slliw(tmp, tmp, 31 - LockWord::kMarkBitStateShift); + switch (cond) { + case JNIMacroUnaryCondition::kZero: + __ Bgez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + break; + case JNIMacroUnaryCondition::kNotZero: + __ Bltz(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + break; + default: + LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond); + UNREACHABLE(); + } +} + +void Riscv64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) { + int32_t small_offset = dchecked_integral_cast<int32_t>(address & 0xfff) - + dchecked_integral_cast<int32_t>((address & 0x800) << 1); + int64_t remainder = static_cast<int64_t>(address) - small_offset; + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ LoadConst64(tmp, remainder); + __ Lb(tmp, tmp, small_offset); + __ Bnez(tmp, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); +} + +void Riscv64JNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ Bind(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); +} + +void Riscv64JNIMacroAssembler::CreateJObject(ManagedRegister m_dest, + FrameOffset spilled_reference_offset, + ManagedRegister m_ref, + bool null_allowed) { + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + Riscv64ManagedRegister ref = m_ref.AsRiscv64(); + DCHECK(dest.IsXRegister()); + DCHECK(ref.IsXRegister()); + + Riscv64Label null_label; + if (null_allowed) { + if (!dest.Equals(ref)) { + __ Li(dest.AsXRegister(), 0); + } + __ Beqz(ref.AsXRegister(), &null_label); + } + __ AddConst64(dest.AsXRegister(), SP, spilled_reference_offset.Int32Value()); + if (null_allowed) { + __ Bind(&null_label); + } +} + +#undef __ + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.h b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h new file mode 100644 index 0000000000..3cbed0d53b --- /dev/null +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_ +#define ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_ + +#include <stdint.h> +#include <memory> +#include <vector> + +#include <android-base/logging.h> + +#include "assembler_riscv64.h" +#include "base/arena_containers.h" +#include "base/enums.h" +#include "base/macros.h" +#include "offsets.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" + +namespace art HIDDEN { +namespace riscv64 { + +class Riscv64JNIMacroAssembler : public JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64> { + public: + explicit Riscv64JNIMacroAssembler(ArenaAllocator* allocator) + : JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64>(allocator) {} + ~Riscv64JNIMacroAssembler(); + + // Finalize the code. + void FinalizeCode() override; + + // Emit code that will create an activation on the stack. + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs) override; + + // Emit code that will remove an activation from the stack. + void RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs, + bool may_suspend) override; + + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; + + ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override; + + // Store routines. + void Store(FrameOffset offs, ManagedRegister src, size_t size) override; + void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override; + void StoreRawPtr(FrameOffset offs, ManagedRegister src) override; + void StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) override; + + // Load routines. + void Load(ManagedRegister dest, FrameOffset offs, size_t size) override; + void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override; + void LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs) override; + void LoadStackReference(ManagedRegister dest, FrameOffset offs) override; + + // Copying routines. + void MoveArguments(ArrayRef<ArgumentLocation> dests, + ArrayRef<ArgumentLocation> srcs, + ArrayRef<FrameOffset> refs) override; + void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; + void Move(ManagedRegister dest, size_t value) override; + + // Sign extension. + void SignExtend(ManagedRegister mreg, size_t size) override; + + // Zero extension. + void ZeroExtend(ManagedRegister mreg, size_t size) override; + + // Exploit fast access in managed code to Thread::Current(). + void GetCurrentThread(ManagedRegister dest) override; + void GetCurrentThread(FrameOffset offset) override; + + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) override; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; + + // Jump to address held at [base+offset] (used for tail calls). + void Jump(ManagedRegister base, Offset offset) override; + + // Call to address held at [base+offset]. + void Call(ManagedRegister base, Offset offset) override; + void CallFromThread(ThreadOffset64 offset) override; + + // Generate fast-path for transition to Native. Go to `label` if any thread flag is set. + // The implementation can use `scratch_regs` which should be callee save core registers + // (already saved before this call) and must preserve all argument registers. + void TryToTransitionFromRunnableToNative(JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs) override; + + // Generate fast-path for transition to Runnable. Go to `label` if any thread flag is set. + // The implementation can use `scratch_regs` which should be core argument registers + // not used as return registers and it must preserve the `return_reg` if any. + void TryToTransitionFromNativeToRunnable(JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs, + ManagedRegister return_reg) override; + + // Generate suspend check and branch to `label` if there is a pending suspend request. + void SuspendCheck(JNIMacroLabel* label) override; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to the `label` if it is. + void ExceptionPoll(JNIMacroLabel* label) override; + // Deliver pending exception. + void DeliverPendingException() override; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() override; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) override; + // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag. + void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to the label by applying a unary condition test to object's mark bit. + void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) override; + + private: + void CreateJObject(ManagedRegister m_dest, + FrameOffset spilled_reference_offset, + ManagedRegister m_ref, + bool null_allowed); + + ART_FRIEND_TEST(JniMacroAssemblerRiscv64Test, CreateJObject); +}; + +class Riscv64JNIMacroLabel final + : public JNIMacroLabelCommon<Riscv64JNIMacroLabel, + Riscv64Label, + InstructionSet::kRiscv64> { + public: + Riscv64Label* AsRiscv64() { + return AsPlatformLabel(); + } +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_ diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc new file mode 100644 index 0000000000..be6feeb9de --- /dev/null +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc @@ -0,0 +1,965 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <dirent.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> + +#include <fstream> +#include <map> +#include <regex> + +#include "gtest/gtest.h" + +#include "indirect_reference_table.h" +#include "lock_word.h" +#include "jni/quick/calling_convention.h" +#include "utils/riscv64/jni_macro_assembler_riscv64.h" +#include "utils/assembler_test_base.h" + +#include "base/macros.h" +#include "base/malloc_arena_pool.h" + +namespace art HIDDEN { +namespace riscv64 { + +#define __ assembler_. + +class JniMacroAssemblerRiscv64Test : public AssemblerTestBase { + public: + JniMacroAssemblerRiscv64Test() : pool_(), allocator_(&pool_), assembler_(&allocator_) { } + + protected: + InstructionSet GetIsa() override { return InstructionSet::kRiscv64; } + + void DriverStr(const std::string& assembly_text, const std::string& test_name) { + assembler_.FinalizeCode(); + size_t cs = assembler_.CodeSize(); + std::vector<uint8_t> data(cs); + MemoryRegion code(&data[0], data.size()); + assembler_.CopyInstructions(code); + Driver(data, assembly_text, test_name); + } + + static Riscv64ManagedRegister AsManaged(XRegister reg) { + return Riscv64ManagedRegister::FromXRegister(reg); + } + + static Riscv64ManagedRegister AsManaged(FRegister reg) { + return Riscv64ManagedRegister::FromFRegister(reg); + } + + std::string EmitRet() { + __ RemoveFrame(/*frame_size=*/ 0u, + /*callee_save_regs=*/ ArrayRef<const ManagedRegister>(), + /*may_suspend=*/ false); + return "ret\n"; + } + + static const size_t kWordSize = 4u; + static const size_t kDoubleWordSize = 8u; + + MallocArenaPool pool_; + ArenaAllocator allocator_; + Riscv64JNIMacroAssembler assembler_; +}; + +TEST_F(JniMacroAssemblerRiscv64Test, StackFrame) { + std::string expected; + + std::unique_ptr<JniCallingConvention> jni_conv = JniCallingConvention::Create( + &allocator_, + /*is_static=*/ false, + /*is_synchronized=*/ false, + /*is_fast_native=*/ false, + /*is_critical_native=*/ false, + /*shorty=*/ "V", + InstructionSet::kRiscv64); + size_t frame_size = jni_conv->FrameSize(); + ManagedRegister method_reg = AsManaged(A0); + ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters(); + + __ BuildFrame(frame_size, method_reg, callee_save_regs); + expected += "addi sp, sp, -208\n" + "sd ra, 200(sp)\n" + "sd s11, 192(sp)\n" + "sd s10, 184(sp)\n" + "sd s9, 176(sp)\n" + "sd s8, 168(sp)\n" + "sd s7, 160(sp)\n" + "sd s6, 152(sp)\n" + "sd s5, 144(sp)\n" + "sd s4, 136(sp)\n" + "sd s3, 128(sp)\n" + "sd s2, 120(sp)\n" + "sd s0, 112(sp)\n" + "fsd fs11, 104(sp)\n" + "fsd fs10, 96(sp)\n" + "fsd fs9, 88(sp)\n" + "fsd fs8, 80(sp)\n" + "fsd fs7, 72(sp)\n" + "fsd fs6, 64(sp)\n" + "fsd fs5, 56(sp)\n" + "fsd fs4, 48(sp)\n" + "fsd fs3, 40(sp)\n" + "fsd fs2, 32(sp)\n" + "fsd fs1, 24(sp)\n" + "fsd fs0, 16(sp)\n" + "sd a0, 0(sp)\n"; + + __ RemoveFrame(frame_size, callee_save_regs, /*may_suspend=*/ false); + expected += "fld fs0, 16(sp)\n" + "fld fs1, 24(sp)\n" + "fld fs2, 32(sp)\n" + "fld fs3, 40(sp)\n" + "fld fs4, 48(sp)\n" + "fld fs5, 56(sp)\n" + "fld fs6, 64(sp)\n" + "fld fs7, 72(sp)\n" + "fld fs8, 80(sp)\n" + "fld fs9, 88(sp)\n" + "fld fs10, 96(sp)\n" + "fld fs11, 104(sp)\n" + "ld s0, 112(sp)\n" + "ld s2, 120(sp)\n" + "ld s3, 128(sp)\n" + "ld s4, 136(sp)\n" + "ld s5, 144(sp)\n" + "ld s6, 152(sp)\n" + "ld s7, 160(sp)\n" + "ld s8, 168(sp)\n" + "ld s9, 176(sp)\n" + "ld s10, 184(sp)\n" + "ld s11, 192(sp)\n" + "ld ra, 200(sp)\n" + "addi sp, sp, 208\n" + "ret\n"; + + DriverStr(expected, "StackFrame"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, ChangeFrameSize) { + std::string expected; + + __ IncreaseFrameSize(128); + expected += "addi sp, sp, -128\n"; + __ DecreaseFrameSize(128); + expected += "addi sp, sp, 128\n"; + + __ IncreaseFrameSize(0); // No-op + __ DecreaseFrameSize(0); // No-op + + __ IncreaseFrameSize(2048); + expected += "addi sp, sp, -2048\n"; + __ DecreaseFrameSize(2048); + expected += "addi t6, sp, 2047\n" + "addi sp, t6, 1\n"; + + __ IncreaseFrameSize(4096); + expected += "addi t6, sp, -2048\n" + "addi sp, t6, -2048\n"; + __ DecreaseFrameSize(4096); + expected += "lui t6, 1\n" + "add sp, sp, t6\n"; + + __ IncreaseFrameSize(6 * KB); + expected += "addi t6, zero, -3\n" + "slli t6, t6, 11\n" + "add sp, sp, t6\n"; + __ DecreaseFrameSize(6 * KB); + expected += "addi t6, zero, 3\n" + "slli t6, t6, 11\n" + "add sp, sp, t6\n"; + + __ IncreaseFrameSize(6 * KB + 16); + expected += "lui t6, 0xffffe\n" + "addiw t6, t6, 2048-16\n" + "add sp, sp, t6\n"; + __ DecreaseFrameSize(6 * KB + 16); + expected += "lui t6, 2\n" + "addiw t6, t6, 16-2048\n" + "add sp, sp, t6\n"; + + DriverStr(expected, "ChangeFrameSize"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Store) { + std::string expected; + + __ Store(FrameOffset(0), AsManaged(A0), kWordSize); + expected += "sw a0, 0(sp)\n"; + __ Store(FrameOffset(2048), AsManaged(S0), kDoubleWordSize); + expected += "addi t6, sp, 0x7f8\n" + "sd s0, 8(t6)\n"; + + __ Store(AsManaged(A1), MemberOffset(256), AsManaged(S2), kDoubleWordSize); + expected += "sd s2, 256(a1)\n"; + __ Store(AsManaged(S3), MemberOffset(4 * KB), AsManaged(T1), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s3\n" + "sw t1, 0(t6)\n"; + + __ Store(AsManaged(A3), MemberOffset(384), AsManaged(FA5), kDoubleWordSize); + expected += "fsd fa5, 384(a3)\n"; + __ Store(AsManaged(S4), MemberOffset(4 * KB + 16), AsManaged(FT10), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s4\n" + "fsw ft10, 16(t6)\n"; + + __ StoreRawPtr(FrameOffset(128), AsManaged(A7)); + expected += "sd a7, 128(sp)\n"; + __ StoreRawPtr(FrameOffset(6 * KB), AsManaged(S11)); + expected += "lui t6, 2\n" + "add t6, t6, sp\n" + "sd s11, -2048(t6)\n"; + + __ StoreStackPointerToThread(ThreadOffset64(512), /*tag_sp=*/ false); + expected += "sd sp, 512(s1)\n"; + __ StoreStackPointerToThread(ThreadOffset64(3 * KB), /*tag_sp=*/ true); + expected += "ori t6, sp, 0x2\n" + "addi t5, s1, 0x7f8\n" + "sd t6, 0x408(t5)\n"; + + DriverStr(expected, "Store"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Load) { + std::string expected; + + __ Load(AsManaged(A0), FrameOffset(0), kWordSize); + expected += "lw a0, 0(sp)\n"; + __ Load(AsManaged(S0), FrameOffset(2048), kDoubleWordSize); + expected += "addi t6, sp, 0x7f8\n" + "ld s0, 8(t6)\n"; + + __ Load(AsManaged(S2), AsManaged(A1), MemberOffset(256), kDoubleWordSize); + expected += "ld s2, 256(a1)\n"; + __ Load(AsManaged(T1), AsManaged(S3), MemberOffset(4 * KB), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s3\n" + "lw t1, 0(t6)\n"; + + __ Load(AsManaged(FA5), AsManaged(A3), MemberOffset(384), kDoubleWordSize); + expected += "fld fa5, 384(a3)\n"; + __ Load(AsManaged(FT10), AsManaged(S4), MemberOffset(4 * KB + 16), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s4\n" + "flw ft10, 16(t6)\n"; + + __ LoadRawPtrFromThread(AsManaged(A7), ThreadOffset64(512)); + expected += "ld a7, 512(s1)\n"; + __ LoadRawPtrFromThread(AsManaged(S11), ThreadOffset64(3 * KB)); + expected += "addi t6, s1, 0x7f8\n" + "ld s11, 0x408(t6)\n"; + + __ LoadGcRootWithoutReadBarrier(AsManaged(T0), AsManaged(A0), MemberOffset(0)); + expected += "lwu t0, 0(a0)\n"; + __ LoadGcRootWithoutReadBarrier(AsManaged(T1), AsManaged(S2), MemberOffset(0x800)); + expected += "addi t6, s2, 0x7f8\n" + "lwu t1, 8(t6)\n"; + + __ LoadStackReference(AsManaged(T0), FrameOffset(0)); + expected += "lwu t0, 0(sp)\n"; + __ LoadStackReference(AsManaged(T1), FrameOffset(0x800)); + expected += "addi t6, sp, 0x7f8\n" + "lwu t1, 8(t6)\n"; + + DriverStr(expected, "Load"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, CreateJObject) { + std::string expected; + + __ CreateJObject(AsManaged(A0), FrameOffset(8), AsManaged(A0), /*null_allowed=*/ true); + expected += "beqz a0, 1f\n" + "addi a0, sp, 8\n" + "1:\n"; + __ CreateJObject(AsManaged(A1), FrameOffset(12), AsManaged(A1), /*null_allowed=*/ false); + expected += "addi a1, sp, 12\n"; + __ CreateJObject(AsManaged(A2), FrameOffset(16), AsManaged(A3), /*null_allowed=*/ true); + expected += "li a2, 0\n" + "beqz a3, 2f\n" + "addi a2, sp, 16\n" + "2:\n"; + __ CreateJObject(AsManaged(A4), FrameOffset(2048), AsManaged(A5), /*null_allowed=*/ false); + expected += "addi t6, sp, 2047\n" + "addi a4, t6, 1\n"; + + DriverStr(expected, "CreateJObject"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) { + std::string expected; + + static constexpr FrameOffset kInvalidReferenceOffset = + JNIMacroAssembler<kArmPointerSize>::kInvalidReferenceOffset; + static constexpr size_t kNativePointerSize = static_cast<size_t>(kRiscv64PointerSize); + static constexpr size_t kFloatSize = 4u; + static constexpr size_t kXlenInBytes = 8u; // Used for integral args and `double`. + + // Normal or @FastNative static with parameters "LIJIJILJI". + // Note: This shall not spill references to the stack. The JNI compiler spills + // references in an separate initial pass before moving arguments and creating `jobject`s. + ArgumentLocation move_dests1[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kNativePointerSize), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + ArgumentLocation(FrameOffset(16), kXlenInBytes), + }; + ArgumentLocation move_srcs1[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + ArgumentLocation(FrameOffset(76), 2 * kVRegSize), + ArgumentLocation(FrameOffset(84), kVRegSize), + }; + FrameOffset move_refs1[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(40), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(72), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1), + ArrayRef<ArgumentLocation>(move_srcs1), + ArrayRef<FrameOffset>(move_refs1)); + expected += "beqz a7, 1f\n" + "addi a7, sp, 72\n" + "1:\n" + "sd a7, 0(sp)\n" + "ld t6, 76(sp)\n" + "sd t6, 8(sp)\n" + "lw t6, 84(sp)\n" + "sd t6, 16(sp)\n" + "mv a7, a6\n" + "mv a6, a5\n" + "mv a5, a4\n" + "mv a4, a3\n" + "mv a3, a2\n" + "li a2, 0\n" + "beqz a1, 2f\n" + "add a2, sp, 40\n" + "2:\n" + "mv a1, a0\n"; + + // Normal or @FastNative static with parameters "LIJIJILJI" - spill references. + ArgumentLocation move_dests1_spill_refs[] = { + ArgumentLocation(FrameOffset(40), kVRegSize), + ArgumentLocation(FrameOffset(72), kVRegSize), + }; + ArgumentLocation move_srcs1_spill_refs[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + }; + FrameOffset move_refs1_spill_refs[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1_spill_refs), + ArrayRef<ArgumentLocation>(move_srcs1_spill_refs), + ArrayRef<FrameOffset>(move_refs1_spill_refs)); + expected += "sw a1, 40(sp)\n" + "sw a7, 72(sp)\n"; + + // Normal or @FastNative with parameters "LLIJIJIJLI" (first is `this`). + // Note: This shall not spill references to the stack. The JNI compiler spills + // references in an separate initial pass before moving arguments and creating `jobject`s. + ArgumentLocation move_dests2[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kNativePointerSize), + ArgumentLocation(FrameOffset(16), kXlenInBytes), + }; + ArgumentLocation move_srcs2[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + ArgumentLocation(FrameOffset(76), 2 * kVRegSize), + ArgumentLocation(FrameOffset(84), kVRegSize), + ArgumentLocation(FrameOffset(88), kVRegSize), + }; + FrameOffset move_refs2[] { + FrameOffset(40), + FrameOffset(44), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(84), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests2), + ArrayRef<ArgumentLocation>(move_srcs2), + ArrayRef<FrameOffset>(move_refs2)); + // Args in A1-A7 do not move but references are converted to `jobject`. + expected += "addi a1, sp, 40\n" + "beqz a2, 1f\n" + "addi a2, sp, 44\n" + "1:\n" + "ld t6, 76(sp)\n" + "sd t6, 0(sp)\n" + "lwu t6, 84(sp)\n" + "beqz t6, 2f\n" + "addi t6, sp, 84\n" + "2:\n" + "sd t6, 8(sp)\n" + "lw t6, 88(sp)\n" + "sd t6, 16(sp)\n"; + + // Normal or @FastNative static with parameters "FDFDFDFDFDIJIJIJL". + ArgumentLocation move_dests3[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + ArgumentLocation(FrameOffset(16), kNativePointerSize), + }; + ArgumentLocation move_srcs3[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), 2 * kVRegSize), + ArgumentLocation(FrameOffset(88), kVRegSize), + ArgumentLocation(FrameOffset(92), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + }; + FrameOffset move_refs3[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(88), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests3), + ArrayRef<ArgumentLocation>(move_srcs3), + ArrayRef<FrameOffset>(move_refs3)); + // FP args in FA0-FA7 do not move. + expected += "sd a5, 0(sp)\n" + "sd a6, 8(sp)\n" + "beqz a7, 1f\n" + "addi a7, sp, 88\n" + "1:\n" + "sd a7, 16(sp)\n" + "mv a5, a2\n" + "mv a6, a3\n" + "mv a7, a4\n" + "lw a2, 88(sp)\n" + "ld a3, 92(sp)\n" + "mv a4, a1\n" + "mv a1, a0\n"; + + // @CriticalNative with parameters "DFDFDFDFIDJIJFDIIJ". + ArgumentLocation move_dests4[] = { + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + }; + ArgumentLocation move_srcs4[] = { + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(FrameOffset(92), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize), + ArgumentLocation(FrameOffset(112), kVRegSize), + ArgumentLocation(FrameOffset(116), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize), + }; + FrameOffset move_refs4[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests4), + ArrayRef<ArgumentLocation>(move_srcs4), + ArrayRef<FrameOffset>(move_refs4)); + // FP args in FA0-FA7 and integral args in A2-A4 do not move. + expected += "sd a6, 0(sp)\n" + "sd a7, 8(sp)\n" + "mv a0, a1\n" + "ld a1, 92(sp)\n" + "ld a6, 116(sp)\n" + "mv a7, a5\n" + "lw a5, 112(sp)\n"; + + // @CriticalNative with parameters "JIJIJIJIJI". + ArgumentLocation move_dests5[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + }; + ArgumentLocation move_srcs5[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize), + ArgumentLocation(FrameOffset(84), kVRegSize), + ArgumentLocation(FrameOffset(88), 2 * kVRegSize), + ArgumentLocation(FrameOffset(96), kVRegSize), + }; + FrameOffset move_refs5[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests5), + ArrayRef<ArgumentLocation>(move_srcs5), + ArrayRef<FrameOffset>(move_refs5)); + expected += "ld t6, 88(sp)\n" + "sd t6, 0(sp)\n" + "lw t6, 96(sp)\n" + "sd t6, 8(sp)\n" + "mv a0, a1\n" + "mv a1, a2\n" + "mv a2, a3\n" + "mv a3, a4\n" + "mv a4, a5\n" + "mv a5, a6\n" + "mv a6, a7\n" + "lw a7, 84(sp)\n"; + + DriverStr(expected, "MoveArguments"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Move) { + std::string expected; + + __ Move(AsManaged(A0), AsManaged(A1), kWordSize); + expected += "mv a0, a1\n"; + __ Move(AsManaged(A2), AsManaged(A3), kDoubleWordSize); + expected += "mv a2, a3\n"; + + __ Move(AsManaged(A4), AsManaged(A4), kWordSize); // No-op. + __ Move(AsManaged(A5), AsManaged(A5), kDoubleWordSize); // No-op. + + DriverStr(expected, "Move"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, GetCurrentThread) { + std::string expected; + + __ GetCurrentThread(AsManaged(A0)); + expected += "mv a0, s1\n"; + + __ GetCurrentThread(FrameOffset(256)); + expected += "sd s1, 256(sp)\n"; + __ GetCurrentThread(FrameOffset(3 * KB)); + expected += "addi t6, sp, 0x7f8\n" + "sd s1, 0x408(t6)\n"; + + DriverStr(expected, "GetCurrentThread"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, DecodeJNITransitionOrLocalJObject) { + std::string expected; + + constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask(); + constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ DecodeJNITransitionOrLocalJObject(AsManaged(A0), slow_path.get(), resume.get()); + expected += "beqz a0, 1f\n" + "andi t6, a0, " + std::to_string(kGlobalOrWeakGlobalMask) + "\n" + "bnez t6, 2f\n" + "andi a0, a0, ~" + std::to_string(kIndirectRefKindMask) + "\n" + "lwu a0, (a0)\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ Jump(resume.get()); + expected += "j 1b\n"; + + DriverStr(expected, "DecodeJNITransitionOrLocalJObject"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, JumpCodePointer) { + std::string expected; + + __ Jump(AsManaged(A0), Offset(24)); + expected += "ld t6, 24(a0)\n" + "jr t6\n"; + + __ Jump(AsManaged(S2), Offset(2048)); + expected += "addi t6, s2, 0x7f8\n" + "ld t6, 8(t6)\n" + "jr t6\n"; + + DriverStr(expected, "JumpCodePointer"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Call) { + std::string expected; + + __ Call(AsManaged(A0), Offset(32)); + expected += "ld ra, 32(a0)\n" + "jalr ra\n"; + + __ Call(AsManaged(S2), Offset(2048)); + expected += "addi t6, s2, 0x7f8\n" + "ld ra, 8(t6)\n" + "jalr ra\n"; + + __ CallFromThread(ThreadOffset64(256)); + expected += "ld ra, 256(s1)\n" + "jalr ra\n"; + + __ CallFromThread(ThreadOffset64(3 * KB)); + expected += "addi t6, s1, 0x7f8\n" + "ld ra, 0x408(t6)\n" + "jalr ra\n"; + + DriverStr(expected, "Call"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Transitions) { + std::string expected; + + constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); + constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); + static_assert(kRunnableStateValue == 0u); + constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + static_assert(thread_flags_offset.SizeValue() == 0u); + constexpr size_t thread_held_mutex_mutator_lock_offset = + Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock).SizeValue(); + constexpr size_t thread_mutator_lock_offset = + Thread::MutatorLockOffset<kRiscv64PointerSize>().SizeValue(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + const ManagedRegister raw_scratch_regs[] = { AsManaged(T0), AsManaged(T1) }; + const ArrayRef<const ManagedRegister> scratch_regs(raw_scratch_regs); + + __ TryToTransitionFromRunnableToNative(slow_path.get(), scratch_regs); + expected += "1:\n" + "lr.w t0, (s1)\n" + "li t1, " + std::to_string(kNativeStateValue) + "\n" + "bnez t0, 4f\n" + "sc.w.rl t0, t1, (s1)\n" + "bnez t0, 1b\n" + "addi t6, s1, 0x7f8\n" + "sd x0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n"; + + __ TryToTransitionFromNativeToRunnable(slow_path.get(), scratch_regs, AsManaged(A0)); + expected += "2:\n" + "lr.w.aq t0, (s1)\n" + "li t1, " + std::to_string(kNativeStateValue) + "\n" + "bne t0, t1, 4f\n" + "sc.w t0, x0, (s1)\n" + "bnez t0, 2b\n" + "ld t0, " + std::to_string(thread_mutator_lock_offset) + "(s1)\n" + "addi t6, s1, 0x7f8\n" + "sd t0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n"; + + __ Bind(resume.get()); + expected += "3:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "4:\n"; + + __ Jump(resume.get()); + expected += "j 3b"; + + DriverStr(expected, "SuspendCheck"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, SuspendCheck) { + std::string expected; + + ThreadOffset64 thread_flags_offet = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ SuspendCheck(slow_path.get()); + expected += "lw t6, " + std::to_string(thread_flags_offet.Int32Value()) + "(s1)\n" + "andi t6, t6, " + std::to_string(Thread::SuspendOrCheckpointRequestFlags()) + "\n" + "bnez t6, 2f\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ Jump(resume.get()); + expected += "j 1b"; + + DriverStr(expected, "SuspendCheck"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Exception) { + std::string expected; + + ThreadOffset64 exception_offset = Thread::ExceptionOffset<kArm64PointerSize>(); + ThreadOffset64 deliver_offset = QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + + __ ExceptionPoll(slow_path.get()); + expected += "ld t6, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n" + "bnez t6, 1f\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "1:\n"; + + __ DeliverPendingException(); + expected += "ld a0, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n" + "ld ra, " + std::to_string(deliver_offset.Int32Value()) + "(s1)\n" + "jalr ra\n" + "unimp\n"; + + DriverStr(expected, "Exception"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, JumpLabel) { + std::string expected; + + std::unique_ptr<JNIMacroLabel> target = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> back = __ CreateLabel(); + + __ Jump(target.get()); + expected += "j 2f\n"; + + __ Bind(back.get()); + expected += "1:\n"; + + __ Move(AsManaged(A0), AsManaged(A1), static_cast<size_t>(kRiscv64PointerSize)); + expected += "mv a0, a1\n"; + + __ Bind(target.get()); + expected += "2:\n"; + + __ Jump(back.get()); + expected += "j 1b\n"; + + DriverStr(expected, "JumpLabel"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, ReadBarrier) { + std::string expected; + + ThreadOffset64 is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>(); + MemberOffset monitor_offset = mirror::Object::MonitorOffset(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kNotZero); + expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n" + "bnez t6, 2f\n"; + + __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kZero); + expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n" + "beqz t6, 2f\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ TestMarkBit(AsManaged(A0), resume.get(), JNIMacroUnaryCondition::kNotZero); + expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(a0)\n" + "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n" + "bltz t6, 1b\n"; + + __ TestMarkBit(AsManaged(T0), resume.get(), JNIMacroUnaryCondition::kZero); + expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(t0)\n" + "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n" + "bgez t6, 1b\n"; + + DriverStr(expected, "ReadBarrier"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, TestByteAndJumpIfNotZero) { + // Note: The `TestByteAndJumpIfNotZero()` takes the address as a `uintptr_t`. + // Use 32-bit addresses, so that we can include this test in 32-bit host tests. + + std::string expected; + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ TestByteAndJumpIfNotZero(0x12345678u, slow_path.get()); + expected += "lui t6, 0x12345\n" + "lb t6, 0x678(t6)\n" + "bnez t6, 2f\n"; + + __ TestByteAndJumpIfNotZero(0x87654321u, slow_path.get()); + expected += "lui t6, 0x87654/4\n" + "slli t6, t6, 2\n" + "lb t6, 0x321(t6)\n" + "bnez t6, 2f\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ TestByteAndJumpIfNotZero(0x456789abu, resume.get()); + expected += "lui t6, 0x45678+1\n" + "lb t6, 0x9ab-0x1000(t6)\n" + "bnez t6, 1b\n"; + + DriverStr(expected, "TestByteAndJumpIfNotZero"); +} + +#undef __ + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/managed_register_riscv64.cc b/compiler/utils/riscv64/managed_register_riscv64.cc index 560019ae09..99bd4be784 100644 --- a/compiler/utils/riscv64/managed_register_riscv64.cc +++ b/compiler/utils/riscv64/managed_register_riscv64.cc @@ -18,7 +18,7 @@ #include "base/globals.h" -namespace art { +namespace art HIDDEN { namespace riscv64 { bool Riscv64ManagedRegister::Overlaps(const Riscv64ManagedRegister& other) const { diff --git a/compiler/utils/riscv64/managed_register_riscv64.h b/compiler/utils/riscv64/managed_register_riscv64.h index 8e02a9dcc8..622d766945 100644 --- a/compiler/utils/riscv64/managed_register_riscv64.h +++ b/compiler/utils/riscv64/managed_register_riscv64.h @@ -24,7 +24,7 @@ #include "base/macros.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { namespace riscv64 { const int kNumberOfXRegIds = kNumberOfXRegisters; diff --git a/compiler/utils/riscv64/managed_register_riscv64_test.cc b/compiler/utils/riscv64/managed_register_riscv64_test.cc index c6ad2dc38a..d7012a796a 100644 --- a/compiler/utils/riscv64/managed_register_riscv64_test.cc +++ b/compiler/utils/riscv64/managed_register_riscv64_test.cc @@ -19,7 +19,7 @@ #include "base/globals.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace riscv64 { TEST(Riscv64ManagedRegister, NoRegister) { diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h index d0fff73df3..1be4532f3e 100644 --- a/compiler/utils/stack_checks.h +++ b/compiler/utils/stack_checks.h @@ -35,7 +35,7 @@ static constexpr size_t kSmallFrameSize = 1 * KB; // stack overflow check on method entry. // // A frame is considered large when it's above kLargeFrameSize. -static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa ATTRIBUTE_UNUSED) { +static inline bool FrameNeedsStackCheck(size_t size, [[maybe_unused]] InstructionSet isa) { return size >= kLargeFrameSize; } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index a6b90114b2..955574e76f 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -3213,6 +3213,14 @@ void X86Assembler::addw(const Address& address, const Immediate& imm) { } +void X86Assembler::addw(Register reg, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_uint16() || imm.is_int16()) << imm.value(); + EmitUint8(0x66); + EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true); +} + + void X86Assembler::adcl(Register reg, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitComplex(2, Operand(reg), imm); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 0f7854dc5c..737e9853a5 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -789,6 +789,7 @@ class X86Assembler final : public Assembler { void addl(const Address& address, Register reg); void addl(const Address& address, const Immediate& imm); void addw(const Address& address, const Immediate& imm); + void addw(Register reg, const Immediate& imm); void adcl(Register dst, Register src); void adcl(Register reg, const Immediate& imm); @@ -955,6 +956,12 @@ class X86Assembler final : public Assembler { lock()->xaddl(address, reg); } + void rdtsc() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x31); + } + // // Misc. functionality // diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 5da6f04402..e7f8d593b4 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -89,19 +89,7 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, addresses_.push_back(x86::Address(x86::ESP, 987654321)); } - if (registers_.size() == 0) { - registers_.insert(end(registers_), - { - new x86::Register(x86::EAX), - new x86::Register(x86::EBX), - new x86::Register(x86::ECX), - new x86::Register(x86::EDX), - new x86::Register(x86::EBP), - new x86::Register(x86::ESP), - new x86::Register(x86::ESI), - new x86::Register(x86::EDI) - }); - + if (secondary_register_names_.empty()) { secondary_register_names_.emplace(x86::Register(x86::EAX), "ax"); secondary_register_names_.emplace(x86::Register(x86::EBX), "bx"); secondary_register_names_.emplace(x86::Register(x86::ECX), "cx"); @@ -121,38 +109,28 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, tertiary_register_names_.emplace(x86::Register(x86::ESI), "dh"); tertiary_register_names_.emplace(x86::Register(x86::EDI), "bh"); } - - if (fp_registers_.size() == 0) { - fp_registers_.insert(end(fp_registers_), - { - new x86::XmmRegister(x86::XMM0), - new x86::XmmRegister(x86::XMM1), - new x86::XmmRegister(x86::XMM2), - new x86::XmmRegister(x86::XMM3), - new x86::XmmRegister(x86::XMM4), - new x86::XmmRegister(x86::XMM5), - new x86::XmmRegister(x86::XMM6), - new x86::XmmRegister(x86::XMM7) - }); - } } void TearDown() override { AssemblerTest::TearDown(); - STLDeleteElements(®isters_); - STLDeleteElements(&fp_registers_); } std::vector<x86::Address> GetAddresses() override { return addresses_; } - std::vector<x86::Register*> GetRegisters() override { - return registers_; + ArrayRef<const x86::Register> GetRegisters() override { + static constexpr x86::Register kRegisters[] = { + x86::EAX, x86::EBX, x86::ECX, x86::EDX, x86::EBP, x86::ESP, x86::ESI, x86::EDI + }; + return ArrayRef<const x86::Register>(kRegisters); } - std::vector<x86::XmmRegister*> GetFPRegisters() override { - return fp_registers_; + ArrayRef<const x86::XmmRegister> GetFPRegisters() override { + static constexpr x86::XmmRegister kFPRegisters[] = { + x86::XMM0, x86::XMM1, x86::XMM2, x86::XMM3, x86::XMM4, x86::XMM5, x86::XMM6, x86::XMM7 + }; + return ArrayRef<const x86::XmmRegister>(kFPRegisters); } x86::Immediate CreateImmediate(int64_t imm_value) override { @@ -173,10 +151,8 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, private: std::vector<x86::Address> addresses_; - std::vector<x86::Register*> registers_; std::map<x86::Register, std::string, X86RegisterCompare> secondary_register_names_; std::map<x86::Register, std::string, X86RegisterCompare> tertiary_register_names_; - std::vector<x86::XmmRegister*> fp_registers_; }; class AssemblerX86AVXTest : public AssemblerX86Test { @@ -267,28 +243,28 @@ TEST_F(AssemblerX86Test, RepeatAF) { TEST_F(AssemblerX86Test, PoplAllAddresses) { // Make sure all addressing modes combinations are tested at least once. std::vector<x86::Address> all_addresses; - for (x86::Register* base : GetRegisters()) { + for (x86::Register base : GetRegisters()) { // Base only. - all_addresses.push_back(x86::Address(*base, -1)); - all_addresses.push_back(x86::Address(*base, 0)); - all_addresses.push_back(x86::Address(*base, 1)); - all_addresses.push_back(x86::Address(*base, 123456789)); - for (x86::Register* index : GetRegisters()) { - if (*index == x86::ESP) { + all_addresses.push_back(x86::Address(base, -1)); + all_addresses.push_back(x86::Address(base, 0)); + all_addresses.push_back(x86::Address(base, 1)); + all_addresses.push_back(x86::Address(base, 123456789)); + for (x86::Register index : GetRegisters()) { + if (index == x86::ESP) { // Index cannot be ESP. continue; - } else if (*base == *index) { + } else if (base == index) { // Index only. - all_addresses.push_back(x86::Address(*index, TIMES_1, -1)); - all_addresses.push_back(x86::Address(*index, TIMES_2, 0)); - all_addresses.push_back(x86::Address(*index, TIMES_4, 1)); - all_addresses.push_back(x86::Address(*index, TIMES_8, 123456789)); + all_addresses.push_back(x86::Address(index, TIMES_1, -1)); + all_addresses.push_back(x86::Address(index, TIMES_2, 0)); + all_addresses.push_back(x86::Address(index, TIMES_4, 1)); + all_addresses.push_back(x86::Address(index, TIMES_8, 123456789)); } // Base and index. - all_addresses.push_back(x86::Address(*base, *index, TIMES_1, -1)); - all_addresses.push_back(x86::Address(*base, *index, TIMES_2, 0)); - all_addresses.push_back(x86::Address(*base, *index, TIMES_4, 1)); - all_addresses.push_back(x86::Address(*base, *index, TIMES_8, 123456789)); + all_addresses.push_back(x86::Address(base, index, TIMES_1, -1)); + all_addresses.push_back(x86::Address(base, index, TIMES_2, 0)); + all_addresses.push_back(x86::Address(base, index, TIMES_4, 1)); + all_addresses.push_back(x86::Address(base, index, TIMES_8, 123456789)); } } DriverStr(RepeatA(&x86::X86Assembler::popl, all_addresses, "popl {mem}"), "popq"); @@ -302,10 +278,14 @@ TEST_F(AssemblerX86Test, MovlLoad) { DriverStr(RepeatRA(&x86::X86Assembler::movl, "movl {mem}, %{reg}"), "movl-load"); } -TEST_F(AssemblerX86Test, Addw) { +TEST_F(AssemblerX86Test, AddwMem) { DriverStr(RepeatAI(&x86::X86Assembler::addw, /*imm_bytes*/ 2U, "addw ${imm}, {mem}"), "addw"); } +TEST_F(AssemblerX86Test, AddwImm) { + DriverStr(RepeatrI(&x86::X86Assembler::addw, /*imm_bytes*/ 2U, "addw ${imm}, %{reg}"), "addw"); +} + TEST_F(AssemblerX86Test, Andw) { DriverStr(RepeatAI(&x86::X86Assembler::andw, /*imm_bytes*/ 2U, "andw ${imm}, {mem}"), "andw"); } @@ -510,11 +490,11 @@ TEST_F(AssemblerX86Test, PopcntlAddress) { // Rorl only allows CL as the shift count. std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) { std::ostringstream str; - std::vector<x86::Register*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86::Register> registers = assembler_test->GetRegisters(); x86::Register shifter(x86::ECX); - for (auto reg : registers) { - assembler->rorl(*reg, shifter); - str << "rorl %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rorl(reg, shifter); + str << "rorl %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -530,11 +510,11 @@ TEST_F(AssemblerX86Test, RorlImm) { // Roll only allows CL as the shift count. std::string roll_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) { std::ostringstream str; - std::vector<x86::Register*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86::Register> registers = assembler_test->GetRegisters(); x86::Register shifter(x86::ECX); - for (auto reg : registers) { - assembler->roll(*reg, shifter); - str << "roll %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->roll(reg, shifter); + str << "roll %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -1379,27 +1359,27 @@ TEST_F(AssemblerX86Test, AddressDisplaceBy) { for (int32_t disp0 : displacements) { // initial displacement for (int32_t disp : displacements) { // extra displacement - for (const x86::Register *reg : GetRegisters()) { + for (x86::Register reg : GetRegisters()) { // Test non-SIB addressing. - EXPECT_EQ(x86::Address::displace(x86::Address(*reg, disp0), disp), - x86::Address(*reg, disp0 + disp)); + EXPECT_EQ(x86::Address::displace(x86::Address(reg, disp0), disp), + x86::Address(reg, disp0 + disp)); // Test SIB addressing with EBP base. - if (*reg != x86::ESP) { + if (reg != x86::ESP) { for (ScaleFactor scale : scales) { - EXPECT_EQ(x86::Address::displace(x86::Address(*reg, scale, disp0), disp), - x86::Address(*reg, scale, disp0 + disp)); + EXPECT_EQ(x86::Address::displace(x86::Address(reg, scale, disp0), disp), + x86::Address(reg, scale, disp0 + disp)); } } // Test SIB addressing with different base. - for (const x86::Register *index : GetRegisters()) { - if (*index == x86::ESP) { + for (x86::Register index : GetRegisters()) { + if (index == x86::ESP) { continue; // Skip ESP as it cannot be used with this address constructor. } for (ScaleFactor scale : scales) { - EXPECT_EQ(x86::Address::displace(x86::Address(*reg, *index, scale, disp0), disp), - x86::Address(*reg, *index, scale, disp0 + disp)); + EXPECT_EQ(x86::Address::displace(x86::Address(reg, index, scale, disp0), disp), + x86::Address(reg, index, scale, disp0 + disp)); } } diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index 154e50b4e4..dfdbc183f1 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -83,7 +83,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size, void X86JNIMacroAssembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs, - bool may_suspend ATTRIBUTE_UNUSED) { + [[maybe_unused]] bool may_suspend) { CHECK_ALIGNED(frame_size, kNativeStackAlignment); cfi().RememberState(); // -kFramePointerSize for ArtMethod*. diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 3fdf05bed9..e70570fb94 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -4418,6 +4418,15 @@ void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) { } +void X86_64Assembler::addw(CpuRegister reg, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_uint16() || imm.is_int16()) << imm.value(); + EmitUint8(0x66); + EmitOptionalRex32(reg); + EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true); +} + + void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); // addq only supports 32b immediate. @@ -4467,6 +4476,15 @@ void X86_64Assembler::addw(const Address& address, const Immediate& imm) { } +void X86_64Assembler::addw(const Address& address, CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOperandSizeOverride(); + EmitOptionalRex32(reg, address); + EmitUint8(0x01); + EmitOperand(reg.LowBits(), address); +} + + void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); @@ -5244,6 +5262,12 @@ void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::rdtsc() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x31); +} + void X86_64Assembler::repne_scasb() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 235ea03e2b..7f80dbccf7 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -852,7 +852,9 @@ class X86_64Assembler final : public Assembler { void addl(CpuRegister reg, const Address& address); void addl(const Address& address, CpuRegister reg); void addl(const Address& address, const Immediate& imm); + void addw(CpuRegister reg, const Immediate& imm); void addw(const Address& address, const Immediate& imm); + void addw(const Address& address, CpuRegister reg); void addq(CpuRegister reg, const Immediate& imm); void addq(CpuRegister dst, CpuRegister src); @@ -964,6 +966,8 @@ class X86_64Assembler final : public Assembler { void popcntq(CpuRegister dst, CpuRegister src); void popcntq(CpuRegister dst, const Address& src); + void rdtsc(); + void rorl(CpuRegister reg, const Immediate& imm); void rorl(CpuRegister operand, CpuRegister shifter); void roll(CpuRegister reg, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index a7c206afaa..9e9c2a5fc9 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -199,24 +199,7 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::R15), 123456789)); } - if (registers_.size() == 0) { - registers_.push_back(new x86_64::CpuRegister(x86_64::RAX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RBX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RCX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RDX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RBP)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RSP)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RSI)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RDI)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R8)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R9)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R10)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R11)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R12)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R13)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R14)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R15)); - + if (secondary_register_names_.empty()) { secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "eax"); secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "ebx"); secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "ecx"); @@ -267,42 +250,59 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b"); quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b"); quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b"); - - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM3)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM4)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM5)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM6)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM7)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM8)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM9)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM10)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM11)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM12)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM13)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM14)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM15)); } } void TearDown() override { AssemblerTest::TearDown(); - STLDeleteElements(®isters_); - STLDeleteElements(&fp_registers_); } std::vector<x86_64::Address> GetAddresses() override { return addresses_; } - std::vector<x86_64::CpuRegister*> GetRegisters() override { - return registers_; + ArrayRef<const x86_64::CpuRegister> GetRegisters() override { + static constexpr x86_64::CpuRegister kRegisters[] = { + x86_64::CpuRegister(x86_64::RAX), + x86_64::CpuRegister(x86_64::RBX), + x86_64::CpuRegister(x86_64::RCX), + x86_64::CpuRegister(x86_64::RDX), + x86_64::CpuRegister(x86_64::RBP), + x86_64::CpuRegister(x86_64::RSP), + x86_64::CpuRegister(x86_64::RSI), + x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::R8), + x86_64::CpuRegister(x86_64::R9), + x86_64::CpuRegister(x86_64::R10), + x86_64::CpuRegister(x86_64::R11), + x86_64::CpuRegister(x86_64::R12), + x86_64::CpuRegister(x86_64::R13), + x86_64::CpuRegister(x86_64::R14), + x86_64::CpuRegister(x86_64::R15), + }; + return ArrayRef<const x86_64::CpuRegister>(kRegisters); } - std::vector<x86_64::XmmRegister*> GetFPRegisters() override { - return fp_registers_; + ArrayRef<const x86_64::XmmRegister> GetFPRegisters() override { + static constexpr x86_64::XmmRegister kFPRegisters[] = { + x86_64::XmmRegister(x86_64::XMM0), + x86_64::XmmRegister(x86_64::XMM1), + x86_64::XmmRegister(x86_64::XMM2), + x86_64::XmmRegister(x86_64::XMM3), + x86_64::XmmRegister(x86_64::XMM4), + x86_64::XmmRegister(x86_64::XMM5), + x86_64::XmmRegister(x86_64::XMM6), + x86_64::XmmRegister(x86_64::XMM7), + x86_64::XmmRegister(x86_64::XMM8), + x86_64::XmmRegister(x86_64::XMM9), + x86_64::XmmRegister(x86_64::XMM10), + x86_64::XmmRegister(x86_64::XMM11), + x86_64::XmmRegister(x86_64::XMM12), + x86_64::XmmRegister(x86_64::XMM13), + x86_64::XmmRegister(x86_64::XMM14), + x86_64::XmmRegister(x86_64::XMM15), + }; + return ArrayRef<const x86_64::XmmRegister>(kFPRegisters); } x86_64::Immediate CreateImmediate(int64_t imm_value) override { @@ -328,11 +328,9 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, private: std::vector<x86_64::Address> addresses_; - std::vector<x86_64::CpuRegister*> registers_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_; - std::vector<x86_64::XmmRegister*> fp_registers_; }; class AssemblerX86_64AVXTest : public AssemblerX86_64Test { @@ -515,28 +513,28 @@ TEST_F(AssemblerX86_64Test, Toolchain) { TEST_F(AssemblerX86_64Test, PopqAllAddresses) { // Make sure all addressing modes combinations are tested at least once. std::vector<x86_64::Address> all_addresses; - for (x86_64::CpuRegister* base : GetRegisters()) { + for (const x86_64::CpuRegister& base : GetRegisters()) { // Base only. - all_addresses.push_back(x86_64::Address(*base, -1)); - all_addresses.push_back(x86_64::Address(*base, 0)); - all_addresses.push_back(x86_64::Address(*base, 1)); - all_addresses.push_back(x86_64::Address(*base, 123456789)); - for (x86_64::CpuRegister* index : GetRegisters()) { - if (index->AsRegister() == x86_64::RSP) { + all_addresses.push_back(x86_64::Address(base, -1)); + all_addresses.push_back(x86_64::Address(base, 0)); + all_addresses.push_back(x86_64::Address(base, 1)); + all_addresses.push_back(x86_64::Address(base, 123456789)); + for (const x86_64::CpuRegister& index : GetRegisters()) { + if (index.AsRegister() == x86_64::RSP) { // Index cannot be RSP. continue; - } else if (base->AsRegister() == index->AsRegister()) { + } else if (base.AsRegister() == index.AsRegister()) { // Index only. - all_addresses.push_back(x86_64::Address(*index, TIMES_1, -1)); - all_addresses.push_back(x86_64::Address(*index, TIMES_2, 0)); - all_addresses.push_back(x86_64::Address(*index, TIMES_4, 1)); - all_addresses.push_back(x86_64::Address(*index, TIMES_8, 123456789)); + all_addresses.push_back(x86_64::Address(index, TIMES_1, -1)); + all_addresses.push_back(x86_64::Address(index, TIMES_2, 0)); + all_addresses.push_back(x86_64::Address(index, TIMES_4, 1)); + all_addresses.push_back(x86_64::Address(index, TIMES_8, 123456789)); } // Base and index. - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_1, -1)); - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_2, 0)); - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_4, 1)); - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_8, 123456789)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_1, -1)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_2, 0)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_4, 1)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_8, 123456789)); } } DriverStr(RepeatA(&x86_64::X86_64Assembler::popq, all_addresses, "popq {mem}"), "popq"); @@ -587,11 +585,21 @@ TEST_F(AssemblerX86_64Test, AddlImm) { "add ${imm}, %{reg}"), "addli"); } -TEST_F(AssemblerX86_64Test, Addw) { +TEST_F(AssemblerX86_64Test, AddwMem) { DriverStr( RepeatAI(&x86_64::X86_64Assembler::addw, /*imm_bytes*/2U, "addw ${imm}, {mem}"), "addw"); } +TEST_F(AssemblerX86_64Test, AddwImm) { + DriverStr( + RepeatwI(&x86_64::X86_64Assembler::addw, /*imm_bytes*/2U, "addw ${imm}, %{reg}"), "addw"); +} + +TEST_F(AssemblerX86_64Test, AddwMemReg) { + DriverStr( + RepeatAw(&x86_64::X86_64Assembler::addw, "addw %{reg}, {mem}"), "addw"); +} + TEST_F(AssemblerX86_64Test, ImulqReg1) { DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq"); } @@ -641,11 +649,11 @@ TEST_F(AssemblerX86_64Test, SublImm) { // Shll only allows CL as the shift count. std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shll(*reg, shifter); - str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shll(reg, shifter); + str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -662,11 +670,11 @@ TEST_F(AssemblerX86_64Test, ShllImm) { // Shlq only allows CL as the shift count. std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shlq(*reg, shifter); - str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shlq(reg, shifter); + str << "shlq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -683,11 +691,11 @@ TEST_F(AssemblerX86_64Test, ShlqImm) { // Shrl only allows CL as the shift count. std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shrl(*reg, shifter); - str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shrl(reg, shifter); + str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -703,11 +711,11 @@ TEST_F(AssemblerX86_64Test, ShrlImm) { // Shrq only allows CL as the shift count. std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shrq(*reg, shifter); - str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shrq(reg, shifter); + str << "shrq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -723,11 +731,11 @@ TEST_F(AssemblerX86_64Test, ShrqImm) { // Sarl only allows CL as the shift count. std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->sarl(*reg, shifter); - str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->sarl(reg, shifter); + str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -743,11 +751,11 @@ TEST_F(AssemblerX86_64Test, SarlImm) { // Sarq only allows CL as the shift count. std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->sarq(*reg, shifter); - str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->sarq(reg, shifter); + str << "sarq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -763,11 +771,11 @@ TEST_F(AssemblerX86_64Test, SarqImm) { // Rorl only allows CL as the shift count. std::string rorl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->rorl(*reg, shifter); - str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rorl(reg, shifter); + str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -783,11 +791,11 @@ TEST_F(AssemblerX86_64Test, RorlImm) { // Roll only allows CL as the shift count. std::string roll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->roll(*reg, shifter); - str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->roll(reg, shifter); + str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -803,11 +811,11 @@ TEST_F(AssemblerX86_64Test, RollImm) { // Rorq only allows CL as the shift count. std::string rorq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->rorq(*reg, shifter); - str << "rorq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rorq(reg, shifter); + str << "rorq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -823,11 +831,11 @@ TEST_F(AssemblerX86_64Test, RorqImm) { // Rolq only allows CL as the shift count. std::string rolq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->rolq(*reg, shifter); - str << "rolq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rolq(reg, shifter); + str << "rolq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -2135,7 +2143,7 @@ TEST_F(AssemblerX86_64Test, Psrldq) { "psrldq $2, %xmm15\n", "psrldqi"); } -std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string x87_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; @@ -2202,7 +2210,7 @@ TEST_F(AssemblerX86_64Test, RetImm) { "ret ${imm}", /*non-negative*/ true), "ret"); } -std::string ret_and_leave_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string ret_and_leave_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; @@ -2375,13 +2383,13 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test, std::string suffixes[15] = { "o", "no", "b", "ae", "e", "ne", "be", "a", "s", "ns", "pe", "po", "l", "ge", "le" }; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); std::ostringstream str; - for (auto reg : registers) { + for (auto&& reg : registers) { for (size_t i = 0; i < 15; ++i) { - assembler->setcc(static_cast<x86_64::Condition>(i), *reg); - str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n"; + assembler->setcc(static_cast<x86_64::Condition>(i), reg); + str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(reg) << "\n"; } } @@ -2459,27 +2467,27 @@ TEST_F(AssemblerX86_64Test, AddressDisplaceBy) { for (int32_t disp0 : displacements) { // initial displacement for (int32_t disp : displacements) { // extra displacement - for (const x86_64::CpuRegister* reg : GetRegisters()) { + for (const x86_64::CpuRegister reg : GetRegisters()) { // Test non-SIB addressing. - EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, disp0), disp), - x86_64::Address(*reg, disp0 + disp)); + EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, disp0), disp), + x86_64::Address(reg, disp0 + disp)); // Test SIB addressing with RBP base. - if (reg->AsRegister() != x86_64::RSP) { + if (reg.AsRegister() != x86_64::RSP) { for (ScaleFactor scale : scales) { - EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, scale, disp0), disp), - x86_64::Address(*reg, scale, disp0 + disp)); + EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, scale, disp0), disp), + x86_64::Address(reg, scale, disp0 + disp)); } } // Test SIB addressing with different base. - for (const x86_64::CpuRegister* index : GetRegisters()) { - if (index->AsRegister() == x86_64::RSP) { + for (const x86_64::CpuRegister& index : GetRegisters()) { + if (index.AsRegister() == x86_64::RSP) { continue; // Skip RSP as it cannot be used with this address constructor. } for (ScaleFactor scale : scales) { - EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, *index, scale, disp0), disp), - x86_64::Address(*reg, *index, scale, disp0 + disp)); + EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, index, scale, disp0), disp), + x86_64::Address(reg, index, scale, disp0 + disp)); } } @@ -2513,7 +2521,7 @@ static x86_64::X86_64ManagedRegister ManagedFromFpu(x86_64::FloatRegister r) { return x86_64::X86_64ManagedRegister::FromXmmRegister(r); } -std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string buildframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { // TODO: more interesting spill registers / entry spills. @@ -2556,7 +2564,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) { DriverFn(&buildframe_test_fn, "BuildFrame"); } -std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string removeframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { // TODO: more interesting spill registers / entry spills. @@ -2588,7 +2596,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) { } std::string increaseframe_test_fn( - JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { assembler->IncreaseFrameSize(0U); assembler->IncreaseFrameSize(kStackAlignment); @@ -2608,7 +2616,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) { } std::string decreaseframe_test_fn( - JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { assembler->DecreaseFrameSize(0U); assembler->DecreaseFrameSize(kStackAlignment); diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 388845730e..e9e6dbdae7 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -95,7 +95,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size, void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs, - bool may_suspend ATTRIBUTE_UNUSED) { + [[maybe_unused]] bool may_suspend) { CHECK_ALIGNED(frame_size, kNativeStackAlignment); cfi().RememberState(); int gpr_count = 0; @@ -515,7 +515,7 @@ void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) { } void X86_64JNIMacroAssembler::TryToTransitionFromRunnableToNative( - JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) { + JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kX86_64PointerSize>(); |