diff options
author | 2023-07-03 13:23:28 +0000 | |
---|---|---|
committer | 2023-07-04 13:15:49 +0000 | |
commit | 51c916e7d439a946ea2124f627e902752fde2a79 (patch) | |
tree | e200230b707f3aaea678cd5f267625629790b9f5 | |
parent | 0277a6b9981126261e3d5c1116b6f0f804c68173 (diff) |
riscv64: Add `ScratchRegisterScope` to assembler.
This helps check that the scratch registers we use
do not clash.
Test: m test-art-host-gtest
Test: run-gtests.sh
Test: # Edit `run-test` to disable checker, then
testrunner.py --target --64 --ndebug --optimizing
# Ignore 7 pre-existing failures.
Bug: 283082089
Change-Id: I5e1d1bf956cf1bd3c512bbca63855f31e6401969
-rw-r--r-- | compiler/trampolines/trampoline_compiler.cc | 13 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64.cc | 177 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64.h | 121 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64_test.cc | 113 | ||||
-rw-r--r-- | compiler/utils/riscv64/jni_macro_assembler_riscv64.cc | 53 | ||||
-rw-r--r-- | compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc | 58 |
6 files changed, 388 insertions, 147 deletions
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index 23d1a5265c..d9f56629ef 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -129,19 +129,20 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat EntryPointCallingConvention abi, ThreadOffset64 offset) { Riscv64Assembler assembler(allocator); + ScratchRegisterScope srs(&assembler); + XRegister tmp = srs.AllocateXRegister(); switch (abi) { case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (A0). - // Note: We use `TMP2` here because `TMP` can be used for source address by `Loadd()`. - __ Loadd(TMP2, + __ Loadd(tmp, A0, JNIEnvExt::SelfOffset(static_cast<size_t>(kRiscv64PointerSize)).Int32Value()); - __ Loadd(TMP, TMP2, offset.Int32Value()); - __ Jr(TMP); + __ Loadd(tmp, tmp, offset.Int32Value()); + __ Jr(tmp); break; case kQuickAbi: // TR holds Thread*. - __ Loadd(TMP, TR, offset.Int32Value()); - __ Jr(TMP); + __ Loadd(tmp, TR, offset.Int32Value()); + __ Jr(tmp); break; } diff --git a/compiler/utils/riscv64/assembler_riscv64.cc b/compiler/utils/riscv64/assembler_riscv64.cc index 448e39fa98..00cc98a038 100644 --- a/compiler/utils/riscv64/assembler_riscv64.cc +++ b/compiler/utils/riscv64/assembler_riscv64.cc @@ -941,100 +941,86 @@ void Riscv64Assembler::Csrci(uint32_t csr, uint32_t uimm5) { } void Riscv64Assembler::Loadb(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Lb(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Lb>(rd, rs1, offset); } void Riscv64Assembler::Loadh(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Lh(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Lh>(rd, rs1, offset); } void Riscv64Assembler::Loadw(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Lw(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Lw>(rd, rs1, offset); } void Riscv64Assembler::Loadd(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Ld(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Ld>(rd, rs1, offset); } void Riscv64Assembler::Loadbu(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Lbu(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Lbu>(rd, rs1, offset); } void Riscv64Assembler::Loadhu(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Lhu(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Lhu>(rd, rs1, offset); } void Riscv64Assembler::Loadwu(XRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - Lwu(rd, rs1, offset); + LoadFromOffset<&Riscv64Assembler::Lwu>(rd, rs1, offset); } void Riscv64Assembler::Storeb(XRegister rs2, XRegister rs1, int32_t offset) { - CHECK_NE(rs2, TMP); - AdjustBaseAndOffset(rs1, offset); - Sb(rs2, rs1, offset); + StoreToOffset<&Riscv64Assembler::Sb>(rs2, rs1, offset); } void Riscv64Assembler::Storeh(XRegister rs2, XRegister rs1, int32_t offset) { - CHECK_NE(rs2, TMP); - AdjustBaseAndOffset(rs1, offset); - Sh(rs2, rs1, offset); + StoreToOffset<&Riscv64Assembler::Sh>(rs2, rs1, offset); } void Riscv64Assembler::Storew(XRegister rs2, XRegister rs1, int32_t offset) { - CHECK_NE(rs2, TMP); - AdjustBaseAndOffset(rs1, offset); - Sw(rs2, rs1, offset); + StoreToOffset<&Riscv64Assembler::Sw>(rs2, rs1, offset); } void Riscv64Assembler::Stored(XRegister rs2, XRegister rs1, int32_t offset) { - CHECK_NE(rs2, TMP); - AdjustBaseAndOffset(rs1, offset); - Sd(rs2, rs1, offset); + StoreToOffset<&Riscv64Assembler::Sd>(rs2, rs1, offset); } void Riscv64Assembler::FLoadw(FRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - FLw(rd, rs1, offset); + FLoadFromOffset<&Riscv64Assembler::FLw>(rd, rs1, offset); } void Riscv64Assembler::FLoadd(FRegister rd, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - FLd(rd, rs1, offset); + FLoadFromOffset<&Riscv64Assembler::FLd>(rd, rs1, offset); } void Riscv64Assembler::FStorew(FRegister rs2, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - FSw(rs2, rs1, offset); + FStoreToOffset<&Riscv64Assembler::FSw>(rs2, rs1, offset); } void Riscv64Assembler::FStored(FRegister rs2, XRegister rs1, int32_t offset) { - AdjustBaseAndOffset(rs1, offset); - FSd(rs2, rs1, offset); + FStoreToOffset<&Riscv64Assembler::FSd>(rs2, rs1, offset); } void Riscv64Assembler::LoadConst32(XRegister rd, int32_t value) { - LoadImmediate(rd, value, /*can_use_tmp=*/ false); // No need to use TMP for 32-bit values. + // No need to use a temporary register for 32-bit values. + LoadImmediate(rd, value, /*can_use_tmp=*/ false); } void Riscv64Assembler::LoadConst64(XRegister rd, int64_t value) { - CHECK_NE(rd, TMP); LoadImmediate(rd, value, /*can_use_tmp=*/ true); } template <typename ValueType, typename Addi, typename AddLarge> -void AddConstImpl(XRegister rd, +void AddConstImpl(Riscv64Assembler* assembler, + XRegister rd, XRegister rs1, ValueType value, Addi&& addi, AddLarge&& add_large) { - CHECK_NE(rs1, TMP); + ScratchRegisterScope srs(assembler); + // A temporary must be available for adjustment even if it's not needed. + // However, `rd` can be used as the temporary unless it's the same as `rs1` or SP. + DCHECK_IMPLIES(rd == rs1 || rd == SP, srs.AvailableXRegisters() != 0u); + if (IsInt<12>(value)) { addi(rd, rs1, value); return; @@ -1045,38 +1031,46 @@ void AddConstImpl(XRegister rd, constexpr int32_t kNegativeValueSimpleAdjustment = -0x800; constexpr int32_t kLowestValueForSimpleAdjustment = 2 * kNegativeValueSimpleAdjustment; + if (rd != rs1 && rd != SP) { + srs.IncludeXRegister(rd); + } + XRegister tmp = srs.AllocateXRegister(); if (value >= 0 && value <= kHighestValueForSimpleAdjustment) { - addi(TMP, rs1, kPositiveValueSimpleAdjustment); - addi(rd, TMP, value - kPositiveValueSimpleAdjustment); + addi(tmp, rs1, kPositiveValueSimpleAdjustment); + addi(rd, tmp, value - kPositiveValueSimpleAdjustment); } else if (value < 0 && value >= kLowestValueForSimpleAdjustment) { - addi(TMP, rs1, kNegativeValueSimpleAdjustment); - addi(rd, TMP, value - kNegativeValueSimpleAdjustment); + addi(tmp, rs1, kNegativeValueSimpleAdjustment); + addi(rd, tmp, value - kNegativeValueSimpleAdjustment); } else { - add_large(rd, rs1, value); + add_large(rd, rs1, value, tmp); } } void Riscv64Assembler::AddConst32(XRegister rd, XRegister rs1, int32_t value) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); auto addiw = [&](XRegister rd, XRegister rs1, int32_t value) { Addiw(rd, rs1, value); }; - auto add_large = [&](XRegister rd, XRegister rs1, int32_t value) { - LoadConst32(TMP, value); - Addw(rd, rs1, TMP); + auto add_large = [&](XRegister rd, XRegister rs1, int32_t value, XRegister tmp) { + LoadConst32(tmp, value); + Addw(rd, rs1, tmp); }; - AddConstImpl(rd, rs1, value, addiw, add_large); + AddConstImpl(this, rd, rs1, value, addiw, add_large); } void Riscv64Assembler::AddConst64(XRegister rd, XRegister rs1, int64_t value) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); auto addi = [&](XRegister rd, XRegister rs1, int32_t value) { Addi(rd, rs1, value); }; - auto add_large = [&](XRegister rd, XRegister rs1, int64_t value) { - // We cannot load TMP with `LoadConst64()`, so use `Li()`. + auto add_large = [&](XRegister rd, XRegister rs1, int64_t value, XRegister tmp) { + // We may not have another scratch register for `LoadConst64()`, so use `Li()`. // TODO(riscv64): Refactor `LoadImmediate()` so that we can reuse the code to detect - // when the code path using the `TMP` is beneficial, and use that path with a small - // modification - instead of adding the two parts togeter, add them individually - // to the input `rs1`. (This works as long as `rd` is not `TMP`.) - Li(TMP, value); - Add(rd, rs1, TMP); + // when the code path using the scratch reg is beneficial, and use that path with a + // small modification - instead of adding the two parts togeter, add them individually + // to the input `rs1`. (This works as long as `rd` is not the same as `tmp`.) + Li(tmp, value); + Add(rd, rs1, tmp); }; - AddConstImpl(rd, rs1, value, addi, add_large); + AddConstImpl(this, rd, rs1, value, addi, add_large); } void Riscv64Assembler::Beqz(XRegister rs, Riscv64Label* label, bool is_bare) { @@ -1998,8 +1992,11 @@ void Riscv64Assembler::EmitLiterals() { // This method is used to adjust the base register and offset pair for // a load/store when the offset doesn't fit into 12-bit signed integer. -void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base, int32_t& offset) { - CHECK_NE(base, TMP); // The `TMP` is reserved for adjustment even if it's not needed. +void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base, + int32_t& offset, + ScratchRegisterScope& srs) { + // A scratch register must be available for adjustment even if it's not needed. + CHECK_NE(srs.AvailableXRegisters(), 0u); if (IsInt<12>(offset)) { return; } @@ -2013,6 +2010,7 @@ void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base, int32_t& offset) { constexpr int32_t kNegativeOffsetSimpleAdjustment = -0x800; constexpr int32_t kLowestOffsetForSimpleAdjustment = 2 * kNegativeOffsetSimpleAdjustment; + XRegister tmp = srs.AllocateXRegister(); if (offset >= 0 && offset <= kHighestOffsetForSimpleAdjustment) { // Make the adjustment 8-byte aligned (0x7f8) except for offsets that cannot be reached // with this adjustment, then try 4-byte alignment, then just half of the offset. @@ -2022,27 +2020,67 @@ void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base, int32_t& offset) { ? kPositiveOffsetSimpleAdjustmentAligned4 : offset / 2; DCHECK(IsInt<12>(adjustment)); - Addi(TMP, base, adjustment); + Addi(tmp, base, adjustment); offset -= adjustment; } else if (offset < 0 && offset >= kLowestOffsetForSimpleAdjustment) { - Addi(TMP, base, kNegativeOffsetSimpleAdjustment); + Addi(tmp, base, kNegativeOffsetSimpleAdjustment); offset -= kNegativeOffsetSimpleAdjustment; } else if (offset >= 0x7ffff800) { // Support even large offsets outside the range supported by `SplitOffset()`. - LoadConst32(TMP, offset); - Add(TMP, TMP, base); + LoadConst32(tmp, offset); + Add(tmp, tmp, base); offset = 0; } else { auto [imm20, short_offset] = SplitOffset(offset); - Lui(TMP, imm20); - Add(TMP, TMP, base); + Lui(tmp, imm20); + Add(tmp, tmp, base); offset = short_offset; } - base = TMP; + base = tmp; +} + +template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> +void Riscv64Assembler::LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + // If `rd` differs from `rs1`, allow using it as a temporary if needed. + if (rd != rs1) { + srs.IncludeXRegister(rd); + } + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rd, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> +void Riscv64Assembler::StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rs2) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rs2, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> +void Riscv64Assembler::FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rd, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> +void Riscv64Assembler::FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rs2, rs1, offset); } void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp) { - DCHECK_IMPLIES(can_use_tmp, rd != TMP); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + CHECK_IMPLIES(can_use_tmp, srs.AvailableXRegisters() != 0u); // Helper lambdas. auto addi = [&](XRegister rd, XRegister rs, int32_t imm) { Addi(rd, rs, imm); }; @@ -2210,8 +2248,8 @@ void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp } } - // If we can use `TMP`, try using it to emit a shorter sequence. - // Without `TMP`, the sequence is up to 8 instructions, with `TMP` only up to 6. + // If we can use a scratch register, try using it to emit a shorter sequence. Without a + // scratch reg, the sequence is up to 8 instructions, with a scratch reg only up to 6. if (can_use_tmp) { int64_t low = (imm & 0xffffffff) - ((imm & 0x80000000) << 1); int64_t remainder = imm - low; @@ -2224,6 +2262,7 @@ void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp /*SLLI+ADD*/ 2u; if (new_insns_needed < insns_needed) { DCHECK_NE(low & 0xfffff000, 0); + XRegister tmp = srs.AllocateXRegister(); if (IsInt<20>(high) && !IsInt<12>(high)) { // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate. Lui(rd, static_cast<uint32_t>(high & 0xfffff)); @@ -2231,9 +2270,9 @@ void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp } else { emit_simple_li(rd, high); } - emit_simple_li(TMP, low); + emit_simple_li(tmp, low); Slli(rd, rd, slli_shamt); - Add(rd, rd, TMP); + Add(rd, rd, tmp); return; } } diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h index f7c37284eb..96b5a11775 100644 --- a/compiler/utils/riscv64/assembler_riscv64.h +++ b/compiler/utils/riscv64/assembler_riscv64.h @@ -34,6 +34,8 @@ namespace art HIDDEN { namespace riscv64 { +class ScratchRegisterScope; + enum class FPRoundingMode : uint32_t { kRNE = 0x0, // Round to Nearest, ties to Even kRTZ = 0x1, // Round towards Zero @@ -146,7 +148,9 @@ class Riscv64Assembler final : public Assembler { jump_tables_(allocator->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), - last_branch_id_(0) { + last_branch_id_(0), + available_scratch_core_registers_((1u << TMP) | (1u << TMP2)), + available_scratch_fp_registers_(1u << FTMP) { UNUSED(instruction_set_features); cfi().DelayEmittingAdvancePCs(); } @@ -796,7 +800,17 @@ class Riscv64Assembler final : public Assembler { void Emit(uint32_t value); // Adjust base register and offset if needed for load/store with a large offset. - void AdjustBaseAndOffset(XRegister& base, int32_t& offset); + void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs); + + // Helper templates for loads/stores with 32-bit offsets. + template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> + void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> + void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> + void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> + void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset); // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`. void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp); @@ -989,11 +1003,114 @@ class Riscv64Assembler final : public Assembler { uint32_t last_old_position_; uint32_t last_branch_id_; + uint32_t available_scratch_core_registers_; + uint32_t available_scratch_fp_registers_; + static constexpr uint32_t kXlen = 64; + friend class ScratchRegisterScope; + DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler); }; +class ScratchRegisterScope { + public: + explicit ScratchRegisterScope(Riscv64Assembler* assembler) + : assembler_(assembler), + old_available_scratch_core_registers_(assembler->available_scratch_core_registers_), + old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {} + + ~ScratchRegisterScope() { + assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_; + assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_; + } + + // Alocate a scratch `XRegister`. There must be an available register to allocate. + XRegister AllocateXRegister() { + CHECK_NE(assembler_->available_scratch_core_registers_, 0u); + // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)). + uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) - + CLZ(assembler_->available_scratch_core_registers_); + assembler_->available_scratch_core_registers_ &= ~(1u << reg_num); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + return enum_cast<XRegister>(reg_num); + } + + // Free a previously unavailable core register for use as a scratch register. + // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`. + void FreeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u); + assembler_->available_scratch_core_registers_ |= 1u << reg_num; + } + + // The number of available scratch core registers. + size_t AvailableXRegisters() { + return POPCOUNT(assembler_->available_scratch_core_registers_); + } + + // Make sure a core register is available for use as a scratch register. + void IncludeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + assembler_->available_scratch_core_registers_ |= 1u << reg_num; + } + + // Make sure a core register is not available for use as a scratch register. + void ExcludeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + assembler_->available_scratch_core_registers_ &= ~(1u << reg_num); + } + + // Alocate a scratch `FRegister`. There must be an available register to allocate. + FRegister AllocateFRegister() { + CHECK_NE(assembler_->available_scratch_fp_registers_, 0u); + // Allocate the highest available scratch register (same as for core registers). + uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) - + CLZ(assembler_->available_scratch_fp_registers_); + assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + return enum_cast<FRegister>(reg_num); + } + + // Free a previously unavailable FP register for use as a scratch register. + // This can be an arbitrary register, not necessarly the usual `FTMP`. + void FreeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u); + assembler_->available_scratch_fp_registers_ |= 1u << reg_num; + } + + // The number of available scratch FP registers. + size_t AvailableFRegisters() { + return POPCOUNT(assembler_->available_scratch_fp_registers_); + } + + // Make sure an FP register is available for use as a scratch register. + void IncludeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + assembler_->available_scratch_fp_registers_ |= 1u << reg_num; + } + + // Make sure an FP register is not available for use as a scratch register. + void ExcludeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num); + } + + private: + Riscv64Assembler* const assembler_; + const uint32_t old_available_scratch_core_registers_; + const uint32_t old_available_scratch_fp_registers_; + + DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope); +}; + } // namespace riscv64 } // namespace art diff --git a/compiler/utils/riscv64/assembler_riscv64_test.cc b/compiler/utils/riscv64/assembler_riscv64_test.cc index 15026093b8..707e2cffb4 100644 --- a/compiler/utils/riscv64/assembler_riscv64_test.cc +++ b/compiler/utils/riscv64/assembler_riscv64_test.cc @@ -214,9 +214,8 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, // Test various registers with a few small values. // (Even Zero is an accepted register even if that does not really load the requested value.) for (XRegister* reg : GetRegisters()) { - if (can_use_tmp && *reg == TMP) { - continue; // Not a valid target register. - } + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(*reg); std::string rd = GetRegisterName(*reg); emit_load_const(*reg, -1); expected += "li " + rd + ", -1\n"; @@ -648,20 +647,19 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, large_values.erase(kept_end, large_values.end()); large_values.push_back(0xfff); - std::string tmp_name = GetRegisterName(TMP); - std::string addi_tmp = "addi" + suffix + " " + tmp_name + ", "; - std::string expected; for (XRegister* rd : GetRegisters()) { std::string rd_name = GetRegisterName(*rd); std::string addi_rd = "addi" + suffix + " " + rd_name + ", "; std::string add_rd = "add" + suffix + " " + rd_name + ", "; for (XRegister* rs1 : GetRegisters()) { - // TMP can be the destination register but not the source register. - if (*rs1 == TMP) { - continue; - } + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(*rs1); + srs.ExcludeXRegister(*rd); + std::string rs1_name = GetRegisterName(*rs1); + std::string tmp_name = GetRegisterName((*rs1 != TMP) ? TMP : TMP2); + std::string addi_tmp = "addi" + suffix + " " + tmp_name + ", "; for (int64_t imm : kImm12s) { emit_op(*rd, *rs1, imm); @@ -688,8 +686,10 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, DriverStr(expected, test_name); } - template <typename EmitOp> - std::string RepeatLoadStoreArbitraryOffset(const std::string& head, EmitOp&& emit_op) { + template <typename GetTemp, typename EmitOp> + std::string RepeatLoadStoreArbitraryOffset(const std::string& head, + GetTemp&& get_temp, + EmitOp&& emit_op) { int64_t kImm12s[] = { 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff, -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800, @@ -719,12 +719,15 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, 0x7ffff800, 0x7ffff801, 0x7ffffffe, 0x7fffffff }; - std::string tmp_name = GetRegisterName(TMP); std::string expected; for (XRegister* rs1 : GetRegisters()) { - if (*rs1 == TMP) { - continue; // TMP cannot be the address base register. + XRegister tmp = get_temp(*rs1); + if (tmp == kNoXRegister) { + continue; // Unsupported register combination. } + std::string tmp_name = GetRegisterName(tmp); + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(*rs1); std::string rs1_name = GetRegisterName(*rs1); for (int64_t imm : kImm12s) { @@ -773,13 +776,20 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, bool is_store) { std::string expected; for (XRegister* rd : GetRegisters()) { - // TMP can be the target register for loads but not for stores where loading the - // adjusted address to TMP would clobber the value we want to store. - if (is_store && *rd == TMP) { - continue; - } + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(*rd); + auto get_temp = [&](XRegister rs1) { + if (is_store) { + return (rs1 != TMP && *rd != TMP) + ? TMP + : (rs1 != TMP2 && *rd != TMP2) ? TMP2 : kNoXRegister; + } else { + return rs1 != TMP ? TMP : TMP2; + } + }; expected += RepeatLoadStoreArbitraryOffset( insn + " " + GetRegisterName(*rd), + get_temp, [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(*rd, rs1, offset); }); } DriverStr(expected, test_name); @@ -792,6 +802,7 @@ class AssemblerRISCV64Test : public AssemblerTest<riscv64::Riscv64Assembler, for (FRegister* rd : GetFPRegisters()) { expected += RepeatLoadStoreArbitraryOffset( insn + " " + GetFPRegName(*rd), + [&](XRegister rs1) { return rs1 != TMP ? TMP : TMP2; }, [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(*rd, rs1, offset); }); } DriverStr(expected, test_name); @@ -2013,6 +2024,9 @@ TEST_F(AssemblerRISCV64Test, Csrci) { TEST_F(AssemblerRISCV64Test, LoadConst32) { // `LoadConst32()` emits the same code sequences as `Li()` for 32-bit values. + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(TMP); + srs.ExcludeXRegister(TMP2); DriverStr(RepeatRIb(&Riscv64Assembler::LoadConst32, -32, "li {reg}, {imm}"), "LoadConst32"); } @@ -2398,6 +2412,65 @@ TEST_F(AssemblerRISCV64Test, JumpTable) { DriverStr(expected, "JumpTable"); } +TEST_F(AssemblerRISCV64Test, ScratchRegisters) { + ScratchRegisterScope srs(GetAssembler()); + ASSERT_EQ(2u, srs.AvailableXRegisters()); // Default: TMP(T6) and TMP2(T5). + ASSERT_EQ(1u, srs.AvailableFRegisters()); // Default: FTMP(FT11). + + XRegister tmp = srs.AllocateXRegister(); + EXPECT_EQ(TMP, tmp); + XRegister tmp2 = srs.AllocateXRegister(); + EXPECT_EQ(TMP2, tmp2); + ASSERT_EQ(0u, srs.AvailableXRegisters()); + + FRegister ftmp = srs.AllocateFRegister(); + EXPECT_EQ(FTMP, ftmp); + ASSERT_EQ(0u, srs.AvailableFRegisters()); + + // Test nesting. + srs.FreeXRegister(A0); + srs.FreeXRegister(A1); + srs.FreeFRegister(FA0); + srs.FreeFRegister(FA1); + ASSERT_EQ(2u, srs.AvailableXRegisters()); + ASSERT_EQ(2u, srs.AvailableFRegisters()); + { + ScratchRegisterScope srs2(GetAssembler()); + ASSERT_EQ(2u, srs2.AvailableXRegisters()); + ASSERT_EQ(2u, srs2.AvailableFRegisters()); + XRegister a1 = srs2.AllocateXRegister(); + EXPECT_EQ(A1, a1); + XRegister a0 = srs2.AllocateXRegister(); + EXPECT_EQ(A0, a0); + ASSERT_EQ(0u, srs2.AvailableXRegisters()); + FRegister fa1 = srs2.AllocateFRegister(); + EXPECT_EQ(FA1, fa1); + FRegister fa0 = srs2.AllocateFRegister(); + EXPECT_EQ(FA0, fa0); + ASSERT_EQ(0u, srs2.AvailableFRegisters()); + } + ASSERT_EQ(2u, srs.AvailableXRegisters()); + ASSERT_EQ(2u, srs.AvailableFRegisters()); + + srs.IncludeXRegister(A0); // No-op as the register was already available. + ASSERT_EQ(2u, srs.AvailableXRegisters()); + srs.IncludeFRegister(FA0); // No-op as the register was already available. + ASSERT_EQ(2u, srs.AvailableFRegisters()); + srs.IncludeXRegister(S0); + ASSERT_EQ(3u, srs.AvailableXRegisters()); + srs.IncludeFRegister(FS0); + ASSERT_EQ(3u, srs.AvailableFRegisters()); + + srs.ExcludeXRegister(S1); // No-op as the register was not available. + ASSERT_EQ(3u, srs.AvailableXRegisters()); + srs.ExcludeFRegister(FS1); // No-op as the register was not available. + ASSERT_EQ(3u, srs.AvailableFRegisters()); + srs.ExcludeXRegister(A0); + ASSERT_EQ(2u, srs.AvailableXRegisters()); + srs.ExcludeFRegister(FA0); + ASSERT_EQ(2u, srs.AvailableFRegisters()); +} + #undef __ } // namespace riscv64 diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc index d7e8030fa0..4a5c8099c9 100644 --- a/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc @@ -188,10 +188,11 @@ void Riscv64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_s void Riscv64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) { XRegister src = SP; + ScratchRegisterScope srs(&asm_); if (tag_sp) { - // Note: We use `TMP2` here because `TMP` can be used by `Stored()`. - __ Ori(TMP2, SP, 0x2); - src = TMP2; + XRegister tmp = srs.AllocateXRegister(); + __ Ori(tmp, SP, 0x2); + src = tmp; } __ Stored(src, TR, offs.Int32Value()); } @@ -294,10 +295,10 @@ void Riscv64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, dest_regs |= get_mask(dest.GetRegister()); } } else { - // Note: We use `TMP2` here because `TMP` can be used by `Store()`. + ScratchRegisterScope srs(&asm_); Riscv64ManagedRegister reg = src.IsRegister() ? src.GetRegister().AsRiscv64() - : Riscv64ManagedRegister::FromXRegister(TMP2); + : Riscv64ManagedRegister::FromXRegister(srs.AllocateXRegister()); if (!src.IsRegister()) { if (ref != kInvalidReferenceOffset) { // We're loading the reference only for comparison with null, so it does not matter @@ -422,8 +423,10 @@ void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src, void Riscv64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) { Riscv64ManagedRegister base = m_base.AsRiscv64(); CHECK(base.IsXRegister()) << base; - __ Loadd(TMP, base.AsXRegister(), offs.Int32Value()); - __ Jr(TMP); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadd(tmp, base.AsXRegister(), offs.Int32Value()); + __ Jr(tmp); } void Riscv64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) { @@ -505,15 +508,19 @@ void Riscv64JNIMacroAssembler::TryToTransitionFromNativeToRunnable( } void Riscv64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) { - __ Loadw(TMP, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value()); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value()); DCHECK(IsInt<12>(dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags()))); - __ Andi(TMP, TMP, dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags())); - __ Bnez(TMP, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + __ Andi(tmp, tmp, dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags())); + __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); } void Riscv64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) { - __ Loadd(TMP, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value()); - __ Bnez(TMP, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadd(tmp, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value()); + __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); } void Riscv64JNIMacroAssembler::DeliverPendingException() { @@ -541,7 +548,8 @@ void Riscv64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnary DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); - XRegister test_reg = TMP; + ScratchRegisterScope srs(&asm_); + XRegister test_reg = srs.AllocateXRegister(); int32_t is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value(); __ Loadw(test_reg, TR, is_gc_marking_offset); switch (cond) { @@ -561,17 +569,19 @@ void Riscv64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) { XRegister ref = m_ref.AsRiscv64().AsXRegister(); - __ Loadw(TMP, ref, mirror::Object::MonitorOffset().Int32Value()); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, ref, mirror::Object::MonitorOffset().Int32Value()); // Move the bit we want to check to the sign bit, so that we can use BGEZ/BLTZ // to check it. Extracting the bit for BEQZ/BNEZ would require one more instruction. static_assert(LockWord::kMarkBitStateSize == 1u); - __ Slliw(TMP, TMP, 31 - LockWord::kMarkBitStateShift); + __ Slliw(tmp, tmp, 31 - LockWord::kMarkBitStateShift); switch (cond) { case JNIMacroUnaryCondition::kZero: - __ Bgez(TMP, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + __ Bgez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); break; case JNIMacroUnaryCondition::kNotZero: - __ Bltz(TMP, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + __ Bltz(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); break; default: LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond); @@ -583,10 +593,11 @@ void Riscv64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMa int32_t small_offset = dchecked_integral_cast<int32_t>(address & 0xfff) - dchecked_integral_cast<int32_t>((address & 0x800) << 1); int64_t remainder = static_cast<int64_t>(address) - small_offset; - // Note: We use `TMP2` here because `TMP` can be used by `LoadConst64()`. - __ LoadConst64(TMP2, remainder); - __ Lb(TMP2, TMP2, small_offset); - __ Bnez(TMP2, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ LoadConst64(tmp, remainder); + __ Lb(tmp, tmp, small_offset); + __ Bnez(tmp, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); } void Riscv64JNIMacroAssembler::Bind(JNIMacroLabel* label) { diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc index ec5bb9d818..004ba9bb7f 100644 --- a/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc @@ -230,9 +230,9 @@ TEST_F(JniMacroAssemblerRiscv64Test, Store) { __ StoreStackPointerToThread(ThreadOffset64(512), /*tag_sp=*/ false); expected += "sd sp, 512(s1)\n"; __ StoreStackPointerToThread(ThreadOffset64(3 * KB), /*tag_sp=*/ true); - expected += "ori t5, sp, 0x2\n" - "addi t6, s1, 0x7f8\n" - "sd t5, 0x408(t6)\n"; + expected += "ori t6, sp, 0x2\n" + "addi t5, s1, 0x7f8\n" + "sd t6, 0x408(t5)\n"; DriverStr(expected, "Store"); } @@ -351,10 +351,10 @@ TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) { "addi a7, sp, 72\n" "1:\n" "sd a7, 0(sp)\n" - "ld t5, 76(sp)\n" - "sd t5, 8(sp)\n" - "lw t5, 84(sp)\n" - "sd t5, 16(sp)\n" + "ld t6, 76(sp)\n" + "sd t6, 8(sp)\n" + "lw t6, 84(sp)\n" + "sd t6, 16(sp)\n" "mv a7, a6\n" "mv a6, a5\n" "mv a5, a4\n" @@ -432,15 +432,15 @@ TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) { "beqz a2, 1f\n" "addi a2, sp, 44\n" "1:\n" - "ld t5, 76(sp)\n" - "sd t5, 0(sp)\n" - "lwu t5, 84(sp)\n" - "beqz t5, 2f\n" - "addi t5, sp, 84\n" + "ld t6, 76(sp)\n" + "sd t6, 0(sp)\n" + "lwu t6, 84(sp)\n" + "beqz t6, 2f\n" + "addi t6, sp, 84\n" "2:\n" - "sd t5, 8(sp)\n" - "lw t5, 88(sp)\n" - "sd t5, 16(sp)\n"; + "sd t6, 8(sp)\n" + "lw t6, 88(sp)\n" + "sd t6, 16(sp)\n"; // Normal or @FastNative static with parameters "FDFDFDFDFDIJIJIJL". ArgumentLocation move_dests3[] = { @@ -634,10 +634,10 @@ TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) { __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests5), ArrayRef<ArgumentLocation>(move_srcs5), ArrayRef<FrameOffset>(move_refs5)); - expected += "ld t5, 88(sp)\n" - "sd t5, 0(sp)\n" - "lw t5, 96(sp)\n" - "sd t5, 8(sp)\n" + expected += "ld t6, 88(sp)\n" + "sd t6, 0(sp)\n" + "lw t6, 96(sp)\n" + "sd t6, 8(sp)\n" "mv a0, a1\n" "mv a1, a2\n" "mv a2, a3\n" @@ -927,15 +927,15 @@ TEST_F(JniMacroAssemblerRiscv64Test, TestByteAndJumpIfNotZero) { std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); __ TestByteAndJumpIfNotZero(0x12345678u, slow_path.get()); - expected += "lui t5, 0x12345\n" - "lb t5, 0x678(t5)\n" - "bnez t5, 2f\n"; + expected += "lui t6, 0x12345\n" + "lb t6, 0x678(t6)\n" + "bnez t6, 2f\n"; __ TestByteAndJumpIfNotZero(0x87654321u, slow_path.get()); - expected += "lui t5, 0x87654/4\n" - "slli t5, t5, 2\n" - "lb t5, 0x321(t5)\n" - "bnez t5, 2f\n"; + expected += "lui t6, 0x87654/4\n" + "slli t6, t6, 2\n" + "lb t6, 0x321(t6)\n" + "bnez t6, 2f\n"; __ Bind(resume.get()); expected += "1:\n"; @@ -946,9 +946,9 @@ TEST_F(JniMacroAssemblerRiscv64Test, TestByteAndJumpIfNotZero) { expected += "2:\n"; __ TestByteAndJumpIfNotZero(0x456789abu, resume.get()); - expected += "lui t5, 0x45678+1\n" - "lb t5, 0x9ab-0x1000(t5)\n" - "bnez t5, 1b\n"; + expected += "lui t6, 0x45678+1\n" + "lb t6, 0x9ab-0x1000(t6)\n" + "bnez t6, 1b\n"; DriverStr(expected, "TestByteAndJumpIfNotZero"); } |