diff options
Diffstat (limited to 'compiler/utils')
20 files changed, 3007 insertions, 407 deletions
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index e5eef37b7b..6afc3ddecb 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -230,6 +230,7 @@ void ArmVIXLAssembler::StoreToOffset(StoreOperandType type, if (!CanHoldStoreOffsetThumb(type, offset)) { CHECK_NE(base.GetCode(), kIpCode); if ((reg.GetCode() != kIpCode) && + (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) && ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) { tmp_reg = temps.Acquire(); } else { diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 322f6c4d70..e81e767575 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -135,6 +135,16 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { // jumping within 2KB range. For B(cond, label), because the supported branch range is 256 // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps. void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true); + + // Use literal for generating double constant if it doesn't fit VMOV encoding. + void Vmov(vixl32::DRegister rd, double imm) { + if (vixl::VFP::IsImmFP64(imm)) { + MacroAssembler::Vmov(rd, imm); + } else { + MacroAssembler::Vldr(rd, imm); + } + } + using MacroAssembler::Vmov; }; class ArmVIXLAssembler FINAL : public Assembler { diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 5c4875951b..f655994bd3 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -42,7 +42,10 @@ enum class RegisterView { // private kUseQuaternaryName, }; -template<typename Ass, typename Reg, typename FPReg, typename Imm> +// For use in the template as the default type to get a nonvector registers version. +struct NoVectorRegs {}; + +template<typename Ass, typename Reg, typename FPReg, typename Imm, typename VecReg = NoVectorRegs> class AssemblerTest : public testing::Test { public: Ass* GetAssembler() { @@ -146,7 +149,8 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, - int bias = 0) { + int bias = 0, + int multiplier = 1) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); @@ -154,7 +158,7 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm + bias); + (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -172,7 +176,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm + bias; + sreg << imm * multiplier + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -305,7 +309,7 @@ class AssemblerTest : public testing::Test { template <typename RegType, typename ImmType> std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), int imm_bits, - const std::vector<Reg*> registers, + const std::vector<RegType*> registers, std::string (AssemblerTest::*GetName)(const RegType&), const std::string& fmt, int bias) { @@ -538,6 +542,82 @@ class AssemblerTest : public testing::Test { return str; } + std::string RepeatVV(void (Ass::*f)(VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVVV(void (Ass::*f)(VecReg, VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVR(void (Ass::*f)(VecReg, Reg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, Reg>( + f, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + + template <typename ImmType> + std::string RepeatVIb(void (Ass::*f)(VecReg, ImmType), + int imm_bits, + std::string fmt, + int bias = 0) { + return RepeatTemplatedRegisterImmBits<VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + + template <typename ImmType> + std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0, + int multiplier = 1) { + return RepeatTemplatedRegistersImmBits<VecReg, Reg, ImmType>( + f, + imm_bits, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias, + multiplier); + } + + template <typename ImmType> + std::string RepeatVVIb(void (Ass::*f)(VecReg, VecReg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBits<VecReg, VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + // This is intended to be run as a test. bool CheckTools() { return test_helper_->CheckTools(); @@ -552,6 +632,11 @@ class AssemblerTest : public testing::Test { UNREACHABLE(); } + virtual std::vector<VecReg*> GetVectorRegisters() { + UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers"; + UNREACHABLE(); + } + // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems. virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers"; @@ -971,6 +1056,12 @@ class AssemblerTest : public testing::Test { return sreg.str(); } + std::string GetVecRegName(const VecReg& reg) { + std::ostringstream sreg; + sreg << reg; + return sreg.str(); + } + // If the assembly file needs a header, return it in a sub-class. virtual const char* GetAssemblyHeader() { return nullptr; diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h index d71c2fe997..ad3a099eb6 100644 --- a/compiler/utils/atomic_method_ref_map-inl.h +++ b/compiler/utils/atomic_method_ref_map-inl.h @@ -42,7 +42,7 @@ template <typename T> inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const { const ElementArray* const array = GetArray(ref.dex_file); if (array == nullptr) { - return kInsertResultInvalidDexFile; + return false; } *out = (*array)[ref.dex_method_index].LoadRelaxed(); return true; diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 2f154fb862..3ac6c3ca7a 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -84,7 +84,11 @@ template <> MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( ArenaAllocator* arena, InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) { + const InstructionSetFeatures* instruction_set_features) { +#ifndef ART_ENABLE_CODEGEN_mips64 + UNUSED(instruction_set_features); +#endif + switch (instruction_set) { #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: @@ -92,7 +96,11 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( #endif #ifdef ART_ENABLE_CODEGEN_mips64 case kMips64: - return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena)); + return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler( + arena, + instruction_set_features != nullptr + ? instruction_set_features->AsMips64InstructionSetFeatures() + : nullptr)); #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case kX86_64: diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 5e83e825ed..2e2231b07d 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -3475,8 +3475,8 @@ void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberO CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister()); LoadFromOffset(kLoadWord, dest.AsCoreRegister(), base.AsMips().AsCoreRegister(), offs.Int32Value()); - if (kPoisonHeapReferences && unpoison_reference) { - Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister()); + if (unpoison_reference) { + MaybeUnpoisonHeapReference(dest.AsCoreRegister()); } } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 2fca185ec3..1a5a23d10b 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -501,8 +501,10 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi bool is_float = false); private: + // This will be used as an argument for loads/stores + // when there is no need for implicit null checks. struct NoImplicitNullChecker { - void operator()() {} + void operator()() const {} }; public: @@ -727,6 +729,38 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Pop(Register rd); void PopAndReturn(Register rd, Register rt); + // + // Heap poisoning. + // + + // Poison a heap reference contained in `src` and store it in `dst`. + void PoisonHeapReference(Register dst, Register src) { + // dst = -src. + Subu(dst, ZERO, src); + } + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(Register reg) { + // reg = -reg. + PoisonHeapReference(reg, reg); + } + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(Register reg) { + // reg = -reg. + Subu(reg, ZERO, reg); + } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } + } + void Bind(Label* label) OVERRIDE { Bind(down_cast<MipsLabel*>(label)); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 998f2c709b..0cff44d830 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -184,6 +184,122 @@ void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) Emit(encoding); } +void Mips64Assembler::EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wt, kNoVectorRegister); + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + static_cast<uint32_t>(wt) << kWtShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaBIT(int operation, + int df_m, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df_m << kDfMShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaELM(int operation, + int df_n, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaELMOperationShift | + df_n << kDfNShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaMI10(int s10, + GpuRegister rs, + VectorRegister wd, + int minor_opcode, + int df) { + CHECK_NE(rs, kNoGpuRegister); + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(s10)) << s10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + s10 << kS10Shift | + static_cast<uint32_t>(rs) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode << kS10MinorShift | + df; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaI10(int operation, + int df, + int i10, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(i10)) << i10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + i10 << kI10Shift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2R(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2ROperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2RF(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2RFOperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x21); } @@ -488,6 +604,11 @@ void Mips64Assembler::Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xf, rs, rt, imm16); } +void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + CHECK_NE(rs, ZERO); + EmitI(0x1d, rs, rt, imm16); +} + void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) { EmitI(1, rs, static_cast<GpuRegister>(6), imm16); } @@ -1075,6 +1196,485 @@ void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) { Nor(rd, rs, ZERO); } +void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(n4)) << n4; + EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(n3)) << n3; + EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) { + CHECK(HasMsa()); + CHECK(IsUint<2>(n2)) << n2; + EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) { + CHECK(HasMsa()); + CHECK(IsUint<1>(n1)) << n1; + EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19); +} + +void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::LdiB(VectorRegister wd, int imm8) { + CHECK(HasMsa()); + CHECK(IsInt<8>(imm8)) << imm8; + EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiH(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiW(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiD(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0); +} + +void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1); +} + +void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2); +} + +void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3); +} + +void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0); +} + +void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1); +} + +void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2); +} + +void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3); +} + void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) { TemplateLoadConst32(this, rd, value); } @@ -1101,6 +1701,7 @@ void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value) { } } +// TODO: don't use rtmp, use daui, dahi, dati. void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -2015,80 +2616,103 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, - int32_t offset) { - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); +void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base, + int32_t& offset, + bool is_doubleword) { + // This method is used to adjust the base register and offset pair + // for a load/store when the offset doesn't fit into int16_t. + // It is assumed that `base + offset` is sufficiently aligned for memory + // operands that are machine word in size or smaller. For doubleword-sized + // operands it's assumed that `base` is a multiple of 8, while `offset` + // may be a multiple of 4 (e.g. 4-byte-aligned long and double arguments + // and spilled variables on the stack accessed relative to the stack + // pointer register). + // We preserve the "alignment" of `offset` by adjusting it by a multiple of 8. + CHECK_NE(base, AT); // Must not overwrite the register `base` while loading `offset`. + + bool doubleword_aligned = IsAligned<kMips64DoublewordSize>(offset); + bool two_accesses = is_doubleword && !doubleword_aligned; + + // IsInt<16> must be passed a signed value, hence the static cast below. + if (IsInt<16>(offset) && + (!two_accesses || IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + // Nothing to do: `offset` (and, if needed, `offset + 4`) fits into int16_t. + return; } - switch (type) { - case kLoadSignedByte: - Lb(reg, base, offset); - break; - case kLoadUnsignedByte: - Lbu(reg, base, offset); - break; - case kLoadSignedHalfword: - Lh(reg, base, offset); - break; - case kLoadUnsignedHalfword: - Lhu(reg, base, offset); - break; - case kLoadWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lw(reg, base, offset); - break; - case kLoadUnsignedWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lwu(reg, base, offset); - break; - case kLoadDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Lwu(reg, base, offset); - Lwu(TMP2, base, offset + kMips64WordSize); - Dinsu(reg, TMP2, 32, 32); - } else { - Ld(reg, base, offset); - } - break; - } -} + // Remember the "(mis)alignment" of `offset`, it will be checked at the end. + uint32_t misalignment = offset & (kMips64DoublewordSize - 1); + + // First, see if `offset` can be represented as a sum of two 16-bit signed + // offsets. This can save an instruction. + // To simplify matters, only do this for a symmetric range of offsets from + // about -64KB to about +64KB, allowing further addition of 4 when accessing + // 64-bit variables with two 32-bit accesses. + constexpr int32_t kMinOffsetForSimpleAdjustment = 0x7ff8; // Max int16_t that's a multiple of 8. + constexpr int32_t kMaxOffsetForSimpleAdjustment = 2 * kMinOffsetForSimpleAdjustment; + + if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) { + Daddiu(AT, base, kMinOffsetForSimpleAdjustment); + offset -= kMinOffsetForSimpleAdjustment; + } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) { + Daddiu(AT, base, -kMinOffsetForSimpleAdjustment); + offset += kMinOffsetForSimpleAdjustment; + } else { + // In more complex cases take advantage of the daui instruction, e.g.: + // daui AT, base, offset_high + // [dahi AT, 1] // When `offset` is close to +2GB. + // lw reg_lo, offset_low(AT) + // [lw reg_hi, (offset_low+4)(AT)] // If misaligned 64-bit load. + // or when offset_low+4 overflows int16_t: + // daui AT, base, offset_high + // daddiu AT, AT, 8 + // lw reg_lo, (offset_low-8)(AT) + // lw reg_hi, (offset_low-4)(AT) + int16_t offset_low = Low16Bits(offset); + int32_t offset_low32 = offset_low; + int16_t offset_high = High16Bits(offset); + bool increment_hi16 = offset_low < 0; + bool overflow_hi16 = false; + + if (increment_hi16) { + offset_high++; + overflow_hi16 = (offset_high == -32768); + } + Daui(AT, base, offset_high); -void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, - int32_t offset) { - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); + if (overflow_hi16) { + Dahi(AT, 1); + } + + if (two_accesses && !IsInt<16>(static_cast<int32_t>(offset_low32 + kMips64WordSize))) { + // Avoid overflow in the 16-bit offset of the load/store instruction when adding 4. + Daddiu(AT, AT, kMips64DoublewordSize); + offset_low32 -= kMips64DoublewordSize; + } + + offset = offset_low32; } + base = AT; - switch (type) { - case kLoadWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lwc1(reg, base, offset); - break; - case kLoadDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Lwc1(reg, base, offset); - Lw(TMP2, base, offset + kMips64WordSize); - Mthc1(TMP2, reg); - } else { - Ldc1(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; + CHECK(IsInt<16>(offset)); + if (two_accesses) { + CHECK(IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize))); } + CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1)); +} + +void Mips64Assembler::LoadFromOffset(LoadOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset) { + LoadFromOffset<>(type, reg, base, offset); +} + +void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset) { + LoadFpuFromOffset<>(type, reg, base, offset); } void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, @@ -2118,72 +2742,18 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, } } -void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, +void Mips64Assembler::StoreToOffset(StoreOperandType type, + GpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kStoreByte: - Sb(reg, base, offset); - break; - case kStoreHalfword: - Sh(reg, base, offset); - break; - case kStoreWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Sw(reg, base, offset); - break; - case kStoreDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Sw(reg, base, offset); - Dsrl32(TMP2, reg, 0); - Sw(TMP2, base, offset + kMips64WordSize); - } else { - Sd(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreToOffset<>(type, reg, base, offset); } -void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, +void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, + FpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kStoreWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Swc1(reg, base, offset); - break; - case kStoreDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Mfhc1(TMP2, reg); - Swc1(reg, base, offset); - Sw(TMP2, base, offset + kMips64WordSize); - } else { - Sdc1(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreFpuToOffset<>(type, reg, base, offset); } static dwarf::Reg DWARFReg(GpuRegister reg) { @@ -2367,12 +2937,8 @@ void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, Membe CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister()); LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), base.AsMips64().AsGpuRegister(), offs.Int32Value()); - if (kPoisonHeapReferences && unpoison_reference) { - // TODO: review - // Negate the 32-bit ref - Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister()); - // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64 - Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32); + if (unpoison_reference) { + MaybeUnpoisonHeapReference(dest.AsGpuRegister()); } } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index a0a1db634d..666c6935a1 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -21,6 +21,7 @@ #include <utility> #include <vector> +#include "arch/mips64/instruction_set_features_mips64.h" #include "base/arena_containers.h" #include "base/enums.h" #include "base/macros.h" @@ -266,6 +267,7 @@ void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) { } } +static constexpr size_t kMips64HalfwordSize = 2; static constexpr size_t kMips64WordSize = 4; static constexpr size_t kMips64DoublewordSize = 8; @@ -412,7 +414,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer public: using JNIBase = JNIMacroAssembler<PointerSize::k64>; - explicit Mips64Assembler(ArenaAllocator* arena) + explicit Mips64Assembler(ArenaAllocator* arena, + const Mips64InstructionSetFeatures* instruction_set_features = nullptr) : Assembler(arena), overwriting_(false), overwrite_location_(0), @@ -421,7 +424,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), - last_branch_id_(0) { + last_branch_id_(0), + has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) { cfi().DelayEmittingAdvancePCs(); } @@ -512,6 +516,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64 void Lui(GpuRegister rt, uint16_t imm16); void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 void Sync(uint32_t stype); @@ -643,6 +648,105 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); + // MSA instructions. + void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void Ffint_sW(VectorRegister wd, VectorRegister ws); + void Ffint_sD(VectorRegister wd, VectorRegister ws); + void Ftint_sW(VectorRegister wd, VectorRegister ws); + void Ftint_sD(VectorRegister wd, VectorRegister ws); + + void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1). + void SlliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SlliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SlliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SlliD(VectorRegister wd, VectorRegister ws, int shamt6); + void SraiB(VectorRegister wd, VectorRegister ws, int shamt3); + void SraiH(VectorRegister wd, VectorRegister ws, int shamt4); + void SraiW(VectorRegister wd, VectorRegister ws, int shamt5); + void SraiD(VectorRegister wd, VectorRegister ws, int shamt6); + void SrliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SrliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SrliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SrliD(VectorRegister wd, VectorRegister ws, int shamt6); + + void MoveV(VectorRegister wd, VectorRegister ws); + void SplatiB(VectorRegister wd, VectorRegister ws, int n4); + void SplatiH(VectorRegister wd, VectorRegister ws, int n3); + void SplatiW(VectorRegister wd, VectorRegister ws, int n2); + void SplatiD(VectorRegister wd, VectorRegister ws, int n1); + void FillB(VectorRegister wd, GpuRegister rs); + void FillH(VectorRegister wd, GpuRegister rs); + void FillW(VectorRegister wd, GpuRegister rs); + void FillD(VectorRegister wd, GpuRegister rs); + + void LdiB(VectorRegister wd, int imm8); + void LdiH(VectorRegister wd, int imm10); + void LdiW(VectorRegister wd, int imm10); + void LdiD(VectorRegister wd, int imm10); + void LdB(VectorRegister wd, GpuRegister rs, int offset); + void LdH(VectorRegister wd, GpuRegister rs, int offset); + void LdW(VectorRegister wd, GpuRegister rs, int offset); + void LdD(VectorRegister wd, GpuRegister rs, int offset); + void StB(VectorRegister wd, GpuRegister rs, int offset); + void StH(VectorRegister wd, GpuRegister rs, int offset); + void StW(VectorRegister wd, GpuRegister rs, int offset); + void StD(VectorRegister wd, GpuRegister rs, int offset); + // Higher level composite instructions. int InstrCountForLoadReplicatedConst32(int64_t); void LoadConst32(GpuRegister rd, int32_t value); @@ -654,6 +758,44 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 + // + // Heap poisoning. + // + + // Poison a heap reference contained in `src` and store it in `dst`. + void PoisonHeapReference(GpuRegister dst, GpuRegister src) { + // dst = -src. + // Negate the 32-bit ref. + Dsubu(dst, ZERO, src); + // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. + Dext(dst, dst, 0, 32); + } + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(GpuRegister reg) { + // reg = -reg. + PoisonHeapReference(reg, reg); + } + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(GpuRegister reg) { + // reg = -reg. + // Negate the 32-bit ref. + Dsubu(reg, ZERO, reg); + // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. + Dext(reg, reg, 0, 32); + } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(GpuRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(GpuRegister reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } + } + void Bind(Label* label) OVERRIDE { Bind(down_cast<Mips64Label*>(label)); } @@ -733,6 +875,240 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); + void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword); + + private: + // This will be used as an argument for loads/stores + // when there is no need for implicit null checks. + struct NoImplicitNullChecker { + void operator()() const {} + }; + + public: + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreConstToOffset(StoreOperandType type, + int64_t value, + GpuRegister base, + int32_t offset, + GpuRegister temp, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // We permit `base` and `temp` to coincide (however, we check that neither is AT), + // in which case the `base` register may be overwritten in the process. + CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + GpuRegister reg; + // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp` + // to load and hold the value but we can use AT instead as AT hasn't been used yet. + // Otherwise, `temp` can be used for the value. And if `temp` is the same as the + // original `base` (that is, `base` prior to the adjustment), the original `base` + // register will be overwritten. + if (base == temp) { + temp = AT; + } + + if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) { + if (value == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst64(reg, value); + } + Sd(reg, base, offset); + null_checker(); + } else { + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (low == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst32(reg, low); + } + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + // not aligned to kMips64DoublewordSize + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + null_checker(); + if (high == 0) { + reg = ZERO; + } else { + reg = temp; + if (high != low) { + LoadConst32(reg, high); + } + } + Sw(reg, base, offset + kMips64WordSize); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFromOffset(LoadOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + + switch (type) { + case kLoadSignedByte: + Lb(reg, base, offset); + break; + case kLoadUnsignedByte: + Lbu(reg, base, offset); + break; + case kLoadSignedHalfword: + Lh(reg, base, offset); + break; + case kLoadUnsignedHalfword: + Lhu(reg, base, offset); + break; + case kLoadWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lw(reg, base, offset); + break; + case kLoadUnsignedWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lwu(reg, base, offset); + break; + case kLoadDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Lwu(reg, base, offset); + null_checker(); + Lwu(TMP2, base, offset + kMips64WordSize); + Dinsu(reg, TMP2, 32, 32); + } else { + Ld(reg, base, offset); + null_checker(); + } + break; + } + if (type != kLoadDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFpuFromOffset(LoadOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + + switch (type) { + case kLoadWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lwc1(reg, base, offset); + null_checker(); + break; + case kLoadDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Lwc1(reg, base, offset); + null_checker(); + Lw(TMP2, base, offset + kMips64WordSize); + Mthc1(TMP2, reg); + } else { + Ldc1(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreToOffset(StoreOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // Must not use AT as `reg`, so as not to overwrite the value being stored + // with the adjusted `base`. + CHECK_NE(reg, AT); + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + break; + case kStoreDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + null_checker(); + Dsrl32(TMP2, reg, 0); + Sw(TMP2, base, offset + kMips64WordSize); + } else { + Sd(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreFpuToOffset(StoreOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + + switch (type) { + case kStoreWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Swc1(reg, base, offset); + null_checker(); + break; + case kStoreDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Mfhc1(TMP2, reg); + Swc1(reg, base, offset); + null_checker(); + Sw(TMP2, base, offset + kMips64WordSize); + } else { + Sdc1(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + } + void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -1076,6 +1452,18 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); + void EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df); + void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode); + void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); void Buncond(Mips64Label* label); void Bcond(Mips64Label* label, @@ -1099,6 +1487,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // Emits exception block. void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + bool HasMsa() const { + return has_msa_; + } + // List of exception blocks to generate at the end of the code cache. std::vector<Mips64ExceptionSlowPath> exception_blocks_; @@ -1122,6 +1514,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer uint32_t last_old_position_; uint32_t last_branch_id_; + const bool has_msa_; + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 74b8f068c1..f2e3b1610c 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -37,12 +37,17 @@ struct MIPS64CpuRegisterCompare { class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> { + uint32_t, + mips64::VectorRegister> { public: typedef AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> Base; + uint32_t, + mips64::VectorRegister> Base; + + AssemblerMIPS64Test() + : instruction_set_features_(Mips64InstructionSetFeatures::FromVariant("default", nullptr)) {} protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... @@ -60,7 +65,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative // branches in the .text section and so they require a relocation pass (there's a relocation // section, .rela.text, that has the needed info to fix up the branches). - return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + return " -march=mips64r6 -mmsa -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; } void Pad(std::vector<uint8_t>& data) OVERRIDE { @@ -76,6 +81,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return " -D -bbinary -mmips:isa64r6"; } + mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE { + return new (arena) mips64::Mips64Assembler(arena, instruction_set_features_.get()); + } + void SetUpHelpers() OVERRIDE { if (registers_.size() == 0) { registers_.push_back(new mips64::GpuRegister(mips64::ZERO)); @@ -176,6 +185,39 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, fp_registers_.push_back(new mips64::FpuRegister(mips64::F29)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F30)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F31)); + + vec_registers_.push_back(new mips64::VectorRegister(mips64::W0)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W1)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W2)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W3)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W4)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W5)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W6)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W7)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W8)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W9)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W10)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W11)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W12)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W13)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W14)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W15)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W16)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W17)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W18)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W19)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W20)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W21)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W22)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W23)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W24)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W25)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W26)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W27)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W28)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W29)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W30)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W31)); } } @@ -183,6 +225,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); + STLDeleteElements(&vec_registers_); } std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE { @@ -193,6 +236,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return fp_registers_; } + std::vector<mips64::VectorRegister*> GetVectorRegisters() OVERRIDE { + return vec_registers_; + } + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { return imm_value; } @@ -272,8 +319,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; std::vector<mips64::FpuRegister*> fp_registers_; -}; + std::vector<mips64::VectorRegister*> vec_registers_; + std::unique_ptr<const Mips64InstructionSetFeatures> instruction_set_features_; +}; TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); @@ -1269,6 +1318,24 @@ TEST_F(AssemblerMIPS64Test, Lui) { DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui"); } +TEST_F(AssemblerMIPS64Test, Daui) { + std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters(); + std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters(); + reg2_registers.erase(reg2_registers.begin()); // reg2 can't be ZERO, remove it. + std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true); + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + std::ostringstream expected; + for (mips64::GpuRegister* reg1 : reg1_registers) { + for (mips64::GpuRegister* reg2 : reg2_registers) { + for (int64_t imm : imms) { + __ Daui(*reg1, *reg2, imm); + expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n"; + } + } + } + DriverStr(expected.str(), "daui"); +} + TEST_F(AssemblerMIPS64Test, Dahi) { DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi"); } @@ -1542,6 +1609,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFFFFFF); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x80000001); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0); @@ -1556,6 +1627,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFFFFFF); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x80000001); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0); @@ -1570,6 +1645,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x80000002); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0); @@ -1584,6 +1663,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x80000002); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0); @@ -1598,6 +1681,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x80000004); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0); @@ -1612,6 +1699,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x80000004); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0); @@ -1622,10 +1713,15 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x27FFC); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x80000004); const char* expected = "lb $a0, 0($a0)\n" @@ -1634,25 +1730,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lb $a0, 256($a1)\n" "lb $a0, 1000($a1)\n" "lb $a0, 0x7FFF($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lb $a0, 1($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lb $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lb $a0, 9($at)\n" + "daui $at, $a1, 1\n" "lb $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lb $a0, 0x5678($at)\n" "lb $a0, -256($a1)\n" "lb $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lb $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lb $a0, -2($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lb $a0, -1($at)\n" + "daui $at, $a1, 32768\n" "lb $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lb $a0, 1($at)\n" "lbu $a0, 0($a0)\n" "lbu $a0, 0($a1)\n" @@ -1660,25 +1759,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lbu $a0, 256($a1)\n" "lbu $a0, 1000($a1)\n" "lbu $a0, 0x7FFF($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lbu $a0, 1($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lbu $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lbu $a0, 9($at)\n" + "daui $at, $a1, 1\n" "lbu $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lbu $a0, 0x5678($at)\n" "lbu $a0, -256($a1)\n" "lbu $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lbu $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lbu $a0, -2($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lbu $a0, -1($at)\n" + "daui $at, $a1, 32768\n" "lbu $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lbu $a0, 1($at)\n" "lh $a0, 0($a0)\n" "lh $a0, 0($a1)\n" @@ -1686,25 +1788,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lh $a0, 256($a1)\n" "lh $a0, 1000($a1)\n" "lh $a0, 0x7FFE($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lh $a0, 2($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lh $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lh $a0, 10($at)\n" + "daui $at, $a1, 1\n" "lh $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lh $a0, 0x5678($at)\n" "lh $a0, -256($a1)\n" "lh $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lh $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lh $a0, -4($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lh $a0, -2($at)\n" + "daui $at, $a1, 32768\n" "lh $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lh $a0, 2($at)\n" "lhu $a0, 0($a0)\n" "lhu $a0, 0($a1)\n" @@ -1712,25 +1817,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lhu $a0, 256($a1)\n" "lhu $a0, 1000($a1)\n" "lhu $a0, 0x7FFE($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lhu $a0, 2($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lhu $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lhu $a0, 10($at)\n" + "daui $at, $a1, 1\n" "lhu $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lhu $a0, 0x5678($at)\n" "lhu $a0, -256($a1)\n" "lhu $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lhu $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lhu $a0, -4($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lhu $a0, -2($at)\n" + "daui $at, $a1, 32768\n" "lhu $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lhu $a0, 2($at)\n" "lw $a0, 0($a0)\n" "lw $a0, 0($a1)\n" @@ -1738,25 +1846,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lw $a0, 256($a1)\n" "lw $a0, 1000($a1)\n" "lw $a0, 0x7FFC($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lw $a0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lw $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lw $a0, 12($at)\n" + "daui $at, $a1, 1\n" "lw $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lw $a0, 0x5678($at)\n" "lw $a0, -256($a1)\n" "lw $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lw $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lw $a0, -8($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lw $a0, -4($at)\n" + "daui $at, $a1, 32768\n" "lw $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lw $a0, 4($at)\n" "lwu $a0, 0($a0)\n" "lwu $a0, 0($a1)\n" @@ -1764,59 +1875,73 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lwu $a0, 256($a1)\n" "lwu $a0, 1000($a1)\n" "lwu $a0, 0x7FFC($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lwu $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lwu $a0, 12($at)\n" + "daui $at, $a1, 1\n" "lwu $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lwu $a0, 0x5678($at)\n" "lwu $a0, -256($a1)\n" "lwu $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lwu $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lwu $a0, -8($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lwu $a0, -4($at)\n" + "daui $at, $a1, 32768\n" "lwu $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lwu $a0, 4($at)\n" "ld $a0, 0($a0)\n" "ld $a0, 0($a1)\n" "lwu $a0, 4($a1)\n" "lwu $t3, 8($a1)\n" - "dins $a0, $t3, 32, 32\n" + "dinsu $a0, $t3, 32, 32\n" "ld $a0, 256($a1)\n" "ld $a0, 1000($a1)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 4($at)\n" - "lwu $t3, 8($at)\n" - "dins $a0, $t3, 32, 32\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "ld $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 32760\n" "lwu $a0, 4($at)\n" "lwu $t3, 8($at)\n" - "dins $a0, $t3, 32, 32\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "ld $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "dinsu $a0, $t3, 32, 32\n" + "daddiu $at, $a1, 32760\n" + "ld $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lwu $a0, 12($at)\n" + "lwu $t3, 16($at)\n" + "dinsu $a0, $t3, 32, 32\n" + "daui $at, $a1, 1\n" "ld $a0, 0($at)\n" + "daui $at, $a1, 2\n" + "daddiu $at, $at, 8\n" + "lwu $a0, 0x7ff4($at)\n" + "lwu $t3, 0x7ff8($at)\n" + "dinsu $a0, $t3, 32, 32\n" + "daui $at, $a1, 0x1234\n" + "ld $a0, 0x5678($at)\n" "ld $a0, -256($a1)\n" "ld $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "ld $a0, 0($at)\n"; + "daui $at, $a1, 0xABCE\n" + "ld $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "ld $a0, -8($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lwu $a0, -4($at)\n" + "lwu $t3, 0($at)\n" + "dinsu $a0, $t3, 32, 32\n" + "daui $at, $a1, 32768\n" + "ld $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lwu $a0, 4($at)\n" + "lwu $t3, 8($at)\n" + "dinsu $a0, $t3, 32, 32\n"; DriverStr(expected, "LoadFromOffset"); } @@ -1850,57 +1975,42 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) { "lwc1 $f0, 4($a0)\n" "lwc1 $f0, 256($a0)\n" "lwc1 $f0, 0x7FFC($a0)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "lwc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "lwc1 $f0, 12($at)\n" + "daui $at, $a0, 1\n" "lwc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "lwc1 $f0, 22136($at) # 0x5678\n" "lwc1 $f0, -256($a0)\n" "lwc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" + "daui $at, $a0, 0xABCE\n" + "lwc1 $f0, -0x1100($at) # 0xEF00\n" "ldc1 $f0, 0($a0)\n" "lwc1 $f0, 4($a0)\n" "lw $t3, 8($a0)\n" "mthc1 $t3, $f0\n" "ldc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" "lwc1 $f0, 4($at)\n" "lw $t3, 8($at)\n" "mthc1 $t3, $f0\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 4($at)\n" - "lw $t3, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "ldc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "lwc1 $f0, 12($at)\n" + "lw $t3, 16($at)\n" "mthc1 $t3, $f0\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "daui $at, $a0, 1\n" "ldc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "ldc1 $f0, 22136($at) # 0x5678\n" "ldc1 $f0, -256($a0)\n" "ldc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n"; + "daui $at, $a0, 0xABCE\n" + "ldc1 $f0, -0x1100($at) # 0xEF00\n"; DriverStr(expected, "LoadFpuFromOffset"); } @@ -1960,6 +2070,10 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256); __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768); __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x80000000); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x80000004); const char* expected = "sb $a0, 0($a0)\n" @@ -1968,25 +2082,18 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sb $a0, 256($a1)\n" "sb $a0, 1000($a1)\n" "sb $a0, 0x7FFF($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sb $a0, 1($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "sb $a0, 8($at)\n" + "daddiu $at, $a1, 0x7FF8\n" + "sb $a0, 9($at)\n" + "daui $at, $a1, 1\n" "sb $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sb $a0, 22136($at) # 0x5678\n" "sb $a0, -256($a1)\n" "sb $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sb $a0, 0($at)\n" + "daui $at, $a1, 43982 # 0xABCE\n" + "sb $a0, -4352($at) # 0xEF00\n" "sh $a0, 0($a0)\n" "sh $a0, 0($a1)\n" @@ -1994,25 +2101,18 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sh $a0, 256($a1)\n" "sh $a0, 1000($a1)\n" "sh $a0, 0x7FFE($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sh $a0, 2($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "sh $a0, 8($at)\n" + "daddiu $at, $a1, 0x7FF8\n" + "sh $a0, 10($at)\n" + "daui $at, $a1, 1\n" "sh $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sh $a0, 22136($at) # 0x5678\n" "sh $a0, -256($a1)\n" "sh $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sh $a0, 0($at)\n" + "daui $at, $a1, 43982 # 0xABCE\n" + "sh $a0, -4352($at) # 0xEF00\n" "sw $a0, 0($a0)\n" "sw $a0, 0($a1)\n" @@ -2020,25 +2120,18 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sw $a0, 256($a1)\n" "sw $a0, 1000($a1)\n" "sw $a0, 0x7FFC($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sw $a0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "sw $a0, 8($at)\n" + "daddiu $at, $a1, 0x7FF8\n" + "sw $a0, 12($at)\n" + "daui $at, $a1, 1\n" "sw $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sw $a0, 22136($at) # 0x5678\n" "sw $a0, -256($a1)\n" "sw $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sw $a0, 0($at)\n" + "daui $at, $a1, 43982 # 0xABCE\n" + "sw $a0, -4352($at) # 0xEF00\n" "sd $a0, 0($a0)\n" "sd $a0, 0($a1)\n" @@ -2047,32 +2140,38 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sw $t3, 8($a1)\n" "sd $a0, 256($a1)\n" "sd $a0, 1000($a1)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" "sw $a0, 4($at)\n" "dsrl32 $t3, $a0, 0\n" "sw $t3, 8($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sd $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sw $a0, 4($at)\n" + "daddiu $at, $a1, 32760 # 0x7FF8\n" + "sd $a0, 8($at)\n" + "daddiu $at, $a1, 32760 # 0x7FF8\n" + "sw $a0, 12($at)\n" "dsrl32 $t3, $a0, 0\n" - "sw $t3, 8($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sd $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "sw $t3, 16($at)\n" + "daui $at, $a1, 1\n" "sd $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sd $a0, 22136($at) # 0x5678\n" "sd $a0, -256($a1)\n" "sd $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sd $a0, 0($at)\n"; + "daui $at, $a1, 0xABCE\n" + "sd $a0, -0x1100($at)\n" + "daui $at, $a1, 0x8000\n" + "dahi $at, $at, 1\n" + "sd $a0, -8($at)\n" + "daui $at, $a1, 0x8000\n" + "dahi $at, $at, 1\n" + "sw $a0, -4($at) # 0xFFFC\n" + "dsrl32 $t3, $a0, 0\n" + "sw $t3, 0($at) # 0x0\n" + "daui $at, $a1, 0x8000\n" + "sd $a0, 0($at) # 0x0\n" + "daui $at, $a1, 0x8000\n" + "sw $a0, 4($at) # 0x4\n" + "dsrl32 $t3, $a0, 0\n" + "sw $t3, 8($at) # 0x8\n"; DriverStr(expected, "StoreToOffset"); } @@ -2106,60 +2205,115 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) { "swc1 $f0, 4($a0)\n" "swc1 $f0, 256($a0)\n" "swc1 $f0, 0x7FFC($a0)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "swc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "swc1 $f0, 12($at)\n" + "daui $at, $a0, 1\n" "swc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "swc1 $f0, 22136($at) # 0x5678\n" "swc1 $f0, -256($a0)\n" "swc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" + "daui $at, $a0, 0xABCE\n" + "swc1 $f0, -0x1100($at)\n" "sdc1 $f0, 0($a0)\n" "mfhc1 $t3, $f0\n" "swc1 $f0, 4($a0)\n" "sw $t3, 8($a0)\n" "sdc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" "mfhc1 $t3, $f0\n" "swc1 $f0, 4($at)\n" "sw $t3, 8($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "sdc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" "mfhc1 $t3, $f0\n" - "swc1 $f0, 4($at)\n" - "sw $t3, 8($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "swc1 $f0, 12($at)\n" + "sw $t3, 16($at)\n" + "daui $at, $a0, 1\n" "sdc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "sdc1 $f0, 22136($at) # 0x5678\n" "sdc1 $f0, -256($a0)\n" "sdc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n"; + "daui $at, $a0, 0xABCE\n" + "sdc1 $f0, -0x1100($at)\n"; DriverStr(expected, "StoreFpuToOffset"); } +TEST_F(AssemblerMIPS64Test, StoreConstToOffset) { + __ StoreConstToOffset(mips64::kStoreByte, 0xFF, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreHalfword, 0xFFFF, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x123456789ABCDEF0, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreByte, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreHalfword, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567812345678, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567800000000, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x0000000012345678, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, -0xFFF0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0xFFF0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, -0xFFF0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0xFFF0, mips64::T8); + + const char* expected = + "ori $t8, $zero, 0xFF\n" + "sb $t8, 0($a1)\n" + "ori $t8, $zero, 0xFFFF\n" + "sh $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8,0x5678\n" + "sw $t8, 0($a1)\n" + "lui $t8, 0x9abc\n" + "ori $t8, $t8,0xdef0\n" + "dahi $t8, $t8, 0x5679\n" + "dati $t8, $t8, 0x1234\n" + "sd $t8, 0($a1)\n" + "sb $zero, 0($a1)\n" + "sh $zero, 0($a1)\n" + "sw $zero, 0($a1)\n" + "sd $zero, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8,0x5678\n" + "dins $t8, $t8, 0x20, 0x20\n" + "sd $t8, 0($a1)\n" + "lui $t8, 0x246\n" + "ori $t8, $t8, 0x8acf\n" + "dsll32 $t8, $t8, 0x3\n" + "sd $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sd $t8, 0($a1)\n" + "sw $zero, 0($t8)\n" + "lui $at,0x1234\n" + "ori $at, $at, 0x5678\n" + "sw $at, 0($t8)\n" + "daddiu $at, $a1, -32760 # 0x8008\n" + "sw $zero, -32760($at) # 0x8008\n" + "daddiu $at, $a1, 32760 # 0x7FF8\n" + "lui $t8, 4660 # 0x1234\n" + "ori $t8, $t8, 22136 # 0x5678\n" + "sw $t8, 32760($at) # 0x7FF8\n" + "daddiu $at, $t8, -32760 # 0x8008\n" + "sw $zero, -32760($at) # 0x8008\n" + "daddiu $at, $t8, 32760 # 0x7FF8\n" + "lui $t8, 4660 # 0x1234\n" + "ori $t8, $t8, 22136 # 0x5678\n" + "sw $t8, 32760($at) # 0x7FF8\n"; + DriverStr(expected, "StoreConstToOffset"); +} ////////////////////////////// // Loading/adding Constants // ////////////////////////////// @@ -2356,6 +2510,386 @@ TEST_F(AssemblerMIPS64Test, LoadConst64) { EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths); } +// MSA instructions. + +TEST_F(AssemblerMIPS64Test, AndV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v"); +} + +TEST_F(AssemblerMIPS64Test, OrV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v"); +} + +TEST_F(AssemblerMIPS64Test, NorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v"); +} + +TEST_F(AssemblerMIPS64Test, XorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v"); +} + +TEST_F(AssemblerMIPS64Test, AddvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), + "addv.b"); +} + +TEST_F(AssemblerMIPS64Test, AddvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), + "addv.h"); +} + +TEST_F(AssemblerMIPS64Test, AddvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), + "addv.w"); +} + +TEST_F(AssemblerMIPS64Test, AddvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), + "addv.d"); +} + +TEST_F(AssemblerMIPS64Test, SubvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), + "subv.b"); +} + +TEST_F(AssemblerMIPS64Test, SubvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), + "subv.h"); +} + +TEST_F(AssemblerMIPS64Test, SubvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), + "subv.w"); +} + +TEST_F(AssemblerMIPS64Test, SubvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), + "subv.d"); +} + +TEST_F(AssemblerMIPS64Test, MulvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), + "mulv.b"); +} + +TEST_F(AssemblerMIPS64Test, MulvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), + "mulv.h"); +} + +TEST_F(AssemblerMIPS64Test, MulvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), + "mulv.w"); +} + +TEST_F(AssemblerMIPS64Test, MulvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), + "mulv.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"), + "div_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"), + "div_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"), + "div_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"), + "div_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"), + "div_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"), + "div_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"), + "div_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"), + "div_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"), + "mod_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"), + "mod_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"), + "mod_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"), + "mod_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"), + "mod_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"), + "mod_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"), + "mod_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"), + "mod_u.d"); +} + +TEST_F(AssemblerMIPS64Test, FaddW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), + "fadd.w"); +} + +TEST_F(AssemblerMIPS64Test, FaddD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), + "fadd.d"); +} + +TEST_F(AssemblerMIPS64Test, FsubW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), + "fsub.w"); +} + +TEST_F(AssemblerMIPS64Test, FsubD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), + "fsub.d"); +} + +TEST_F(AssemblerMIPS64Test, FmulW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), + "fmul.w"); +} + +TEST_F(AssemblerMIPS64Test, FmulD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), + "fmul.d"); +} + +TEST_F(AssemblerMIPS64Test, FdivW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), + "fdiv.w"); +} + +TEST_F(AssemblerMIPS64Test, FdivD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), + "fdiv.d"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), + "ffint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), + "ffint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), + "ftint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), + "ftint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, SllB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b"); +} + +TEST_F(AssemblerMIPS64Test, SllH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h"); +} + +TEST_F(AssemblerMIPS64Test, SllW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w"); +} + +TEST_F(AssemblerMIPS64Test, SllD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d"); +} + +TEST_F(AssemblerMIPS64Test, SraB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b"); +} + +TEST_F(AssemblerMIPS64Test, SraH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h"); +} + +TEST_F(AssemblerMIPS64Test, SraW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w"); +} + +TEST_F(AssemblerMIPS64Test, SraD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d"); +} + +TEST_F(AssemblerMIPS64Test, SrlB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b"); +} + +TEST_F(AssemblerMIPS64Test, SrlH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h"); +} + +TEST_F(AssemblerMIPS64Test, SrlW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w"); +} + +TEST_F(AssemblerMIPS64Test, SrlD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d"); +} + +TEST_F(AssemblerMIPS64Test, SlliB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), + "slli.b"); +} + +TEST_F(AssemblerMIPS64Test, SlliH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), + "slli.h"); +} + +TEST_F(AssemblerMIPS64Test, SlliW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), + "slli.w"); +} + +TEST_F(AssemblerMIPS64Test, SlliD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), + "slli.d"); +} + +TEST_F(AssemblerMIPS64Test, MoveV) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v"); +} + +TEST_F(AssemblerMIPS64Test, SplatiB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"), + "splati.b"); +} + +TEST_F(AssemblerMIPS64Test, SplatiH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"), + "splati.h"); +} + +TEST_F(AssemblerMIPS64Test, SplatiW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"), + "splati.w"); +} + +TEST_F(AssemblerMIPS64Test, SplatiD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"), + "splati.d"); +} + +TEST_F(AssemblerMIPS64Test, FillB) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b"); +} + +TEST_F(AssemblerMIPS64Test, FillH) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h"); +} + +TEST_F(AssemblerMIPS64Test, FillW) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); +} + +TEST_F(AssemblerMIPS64Test, FillD) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); +} + +TEST_F(AssemblerMIPS64Test, LdiB) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); +} + +TEST_F(AssemblerMIPS64Test, LdiH) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h"); +} + +TEST_F(AssemblerMIPS64Test, LdiW) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w"); +} + +TEST_F(AssemblerMIPS64Test, LdiD) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d"); +} + +TEST_F(AssemblerMIPS64Test, LdB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b"); +} + +TEST_F(AssemblerMIPS64Test, LdH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2), + "ld.h"); +} + +TEST_F(AssemblerMIPS64Test, LdW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4), + "ld.w"); +} + +TEST_F(AssemblerMIPS64Test, LdD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8), + "ld.d"); +} + +TEST_F(AssemblerMIPS64Test, StB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b"); +} + +TEST_F(AssemblerMIPS64Test, StH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2), + "st.h"); +} + +TEST_F(AssemblerMIPS64Test, StW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4), + "st.w"); +} + +TEST_F(AssemblerMIPS64Test, StD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8), + "st.d"); +} + #undef __ } // namespace art diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h index f57498d34f..bc8e40b437 100644 --- a/compiler/utils/mips64/constants_mips64.h +++ b/compiler/utils/mips64/constants_mips64.h @@ -51,8 +51,36 @@ enum InstructionFields { kFdShift = 6, kFdBits = 5, + kMsaOperationShift = 23, + kMsaELMOperationShift = 22, + kMsa2ROperationShift = 18, + kMsa2RFOperationShift = 17, + kDfShift = 21, + kDfMShift = 16, + kDf2RShift = 16, + kDfNShift = 16, + kWtShift = 16, + kWtBits = 5, + kWsShift = 11, + kWsBits = 5, + kWdShift = 6, + kWdBits = 5, + kS10Shift = 16, + kI10Shift = 11, + kS10MinorShift = 2, + kBranchOffsetMask = 0x0000ffff, kJumpOffsetMask = 0x03ffffff, + kMsaMajorOpcode = 0x1e, + kMsaDfMByteMask = 0x70, + kMsaDfMHalfwordMask = 0x60, + kMsaDfMWordMask = 0x40, + kMsaDfMDoublewordMask = 0x00, + kMsaDfNByteMask = 0x00, + kMsaDfNHalfwordMask = 0x20, + kMsaDfNWordMask = 0x30, + kMsaDfNDoublewordMask = 0x38, + kMsaS10Mask = 0x3ff, }; enum ScaleFactor { diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc index dea396e4a7..42d061ec15 100644 --- a/compiler/utils/mips64/managed_register_mips64.cc +++ b/compiler/utils/mips64/managed_register_mips64.cc @@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const { CHECK(IsValidManagedRegister()); CHECK(other.IsValidManagedRegister()); if (Equals(other)) return true; + if (IsFpuRegister() && other.IsVectorRegister()) { + return (AsFpuRegister() == other.AsOverlappingFpuRegister()); + } else if (IsVectorRegister() && other.IsFpuRegister()) { + return (AsVectorRegister() == other.AsOverlappingVectorRegister()); + } return false; } @@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const { os << "GPU: " << static_cast<int>(AsGpuRegister()); } else if (IsFpuRegister()) { os << "FpuRegister: " << static_cast<int>(AsFpuRegister()); + } else if (IsVectorRegister()) { + os << "VectorRegister: " << static_cast<int>(AsVectorRegister()); } else { os << "??: " << RegId(); } diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index c9f95569cf..3980199b1e 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -30,11 +30,27 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters; const int kNumberOfFpuRegIds = kNumberOfFpuRegisters; const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters; -const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds; -const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds; - -// An instance of class 'ManagedRegister' represents a single GPU register (enum -// Register) or a double precision FP register (enum FpuRegister) +const int kNumberOfVecRegIds = kNumberOfVectorRegisters; +const int kNumberOfVecAllocIds = kNumberOfVectorRegisters; + +const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds; +const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds; + +// Register ids map: +// [0..R[ core registers (enum GpuRegister) +// [R..F[ floating-point registers (enum FpuRegister) +// [F..W[ MSA vector registers (enum VectorRegister) +// where +// R = kNumberOfGpuRegIds +// F = R + kNumberOfFpuRegIds +// W = F + kNumberOfVecRegIds + +// An instance of class 'ManagedRegister' represents a single Mips64 register. +// A register can be one of the following: +// * core register (enum GpuRegister) +// * floating-point register (enum FpuRegister) +// * MSA vector register (enum VectorRegister) +// // 'ManagedRegister::NoRegister()' provides an invalid register. // There is a one-to-one mapping between ManagedRegister and register id. class Mips64ManagedRegister : public ManagedRegister { @@ -49,6 +65,21 @@ class Mips64ManagedRegister : public ManagedRegister { return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds); } + constexpr VectorRegister AsVectorRegister() const { + CHECK(IsVectorRegister()); + return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters)); + } + + constexpr FpuRegister AsOverlappingFpuRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<FpuRegister>(AsVectorRegister()); + } + + constexpr VectorRegister AsOverlappingVectorRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<VectorRegister>(AsFpuRegister()); + } + constexpr bool IsGpuRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfGpuRegIds); @@ -60,6 +91,12 @@ class Mips64ManagedRegister : public ManagedRegister { return (0 <= test) && (test < kNumberOfFpuRegIds); } + constexpr bool IsVectorRegister() const { + CHECK(IsValidManagedRegister()); + const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds); + return (0 <= test) && (test < kNumberOfVecRegIds); + } + void Print(std::ostream& os) const; // Returns true if the two managed-registers ('this' and 'other') overlap. @@ -77,6 +114,11 @@ class Mips64ManagedRegister : public ManagedRegister { return FromRegId(r + kNumberOfGpuRegIds); } + static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) { + CHECK_NE(r, kNoVectorRegister); + return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds); + } + private: constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc new file mode 100644 index 0000000000..8b72d7e61d --- /dev/null +++ b/compiler/utils/mips64/managed_register_mips64_test.cc @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_mips64.h" +#include "globals.h" +#include "gtest/gtest.h" + +namespace art { +namespace mips64 { + +TEST(Mips64ManagedRegister, NoRegister) { + Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64(); + EXPECT_TRUE(reg.IsNoRegister()); + EXPECT_FALSE(reg.Overlaps(reg)); +} + +TEST(Mips64ManagedRegister, GpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(ZERO, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(AT); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(AT, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(V0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(V0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A7); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A7, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T3); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T3, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(S0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(GP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(GP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(SP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(SP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(RA, reg.AsGpuRegister()); +} + +TEST(Mips64ManagedRegister, FpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F0, reg.AsFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + + reg = Mips64ManagedRegister::FromFpuRegister(F1); + vreg = Mips64ManagedRegister::FromVectorRegister(W1); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F1, reg.AsFpuRegister()); + EXPECT_EQ(W1, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + + reg = Mips64ManagedRegister::FromFpuRegister(F20); + vreg = Mips64ManagedRegister::FromVectorRegister(W20); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F20, reg.AsFpuRegister()); + EXPECT_EQ(W20, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + vreg = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F31, reg.AsFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); +} + +TEST(Mips64ManagedRegister, VectorRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0); + Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W0, reg.AsVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + reg = Mips64ManagedRegister::FromVectorRegister(W2); + freg = Mips64ManagedRegister::FromFpuRegister(F2); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W2, reg.AsVectorRegister()); + EXPECT_EQ(F2, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2))); + + reg = Mips64ManagedRegister::FromVectorRegister(W13); + freg = Mips64ManagedRegister::FromFpuRegister(F13); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W13, reg.AsVectorRegister()); + EXPECT_EQ(F13, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13))); + + reg = Mips64ManagedRegister::FromVectorRegister(W29); + freg = Mips64ManagedRegister::FromFpuRegister(F29); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W29, reg.AsVectorRegister()); + EXPECT_EQ(F29, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29))); +} + +TEST(Mips64ManagedRegister, Equals) { + ManagedRegister no_reg = ManagedRegister::NoRegister(); + EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); + + Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +TEST(Mips64ManagedRegister, Overlaps) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F4); + reg_o = Mips64ManagedRegister::FromVectorRegister(W4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W4, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F16); + reg_o = Mips64ManagedRegister::FromVectorRegister(W16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W16, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + reg_o = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W0); + reg_o = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W4); + reg_o = Mips64ManagedRegister::FromFpuRegister(F4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F4, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W16); + reg_o = Mips64ManagedRegister::FromFpuRegister(F16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F16, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W31); + reg_o = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F31, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +} // namespace mips64 +} // namespace art diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 6eab302dab..1736618363 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -958,6 +958,14 @@ void X86Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) { } +void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5B); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -1161,6 +1169,32 @@ void X86Assembler::pand(XmmRegister dst, XmmRegister src) { } +void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::andnps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pandn(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDF); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::orpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1187,6 +1221,98 @@ void X86Assembler::por(XmmRegister dst, XmmRegister src) { } +void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE0); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE3); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x74); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x75); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x76); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x29); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x64); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x65); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x66); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x37); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 2999599fc5..a747cda7bd 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -457,6 +457,7 @@ class X86Assembler FINAL : public Assembler { void cvttss2si(Register dst, XmmRegister src); void cvttsd2si(Register dst, XmmRegister src); + void cvtdq2ps(XmmRegister dst, XmmRegister src); void cvtdq2pd(XmmRegister dst, XmmRegister src); void comiss(XmmRegister a, XmmRegister b); @@ -486,10 +487,27 @@ class X86Assembler FINAL : public Assembler { void andps(XmmRegister dst, const Address& src); void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnps(XmmRegister dst, XmmRegister src); + void pandn(XmmRegister dst, XmmRegister src); + void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); + void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pavgw(XmmRegister dst, XmmRegister src); + + void pcmpeqb(XmmRegister dst, XmmRegister src); + void pcmpeqw(XmmRegister dst, XmmRegister src); + void pcmpeqd(XmmRegister dst, XmmRegister src); + void pcmpeqq(XmmRegister dst, XmmRegister src); + + void pcmpgtb(XmmRegister dst, XmmRegister src); + void pcmpgtw(XmmRegister dst, XmmRegister src); + void pcmpgtd(XmmRegister dst, XmmRegister src); + void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2 + void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm); void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index a74bea207e..f75f972265 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -322,6 +322,14 @@ TEST_F(AssemblerX86Test, RollImm) { DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli"); } +TEST_F(AssemblerX86Test, Cvtdq2ps) { + DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps"); +} + +TEST_F(AssemblerX86Test, Cvtdq2pd) { + DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd"); +} + TEST_F(AssemblerX86Test, ComissAddr) { GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0)); const char* expected = "comiss 0(%EAX), %xmm0\n"; @@ -573,6 +581,18 @@ TEST_F(AssemblerX86Test, PAnd) { DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } +TEST_F(AssemblerX86Test, AndnPD) { + DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); +} + +TEST_F(AssemblerX86Test, AndnPS) { + DriverStr(RepeatFF(&x86::X86Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps"); +} + +TEST_F(AssemblerX86Test, PAndn) { + DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn"); +} + TEST_F(AssemblerX86Test, OrPD) { DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd"); } @@ -585,6 +605,46 @@ TEST_F(AssemblerX86Test, POr) { DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por"); } +TEST_F(AssemblerX86Test, PAvgB) { + DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb"); +} + +TEST_F(AssemblerX86Test, PAvgW) { + DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); +} + +TEST_F(AssemblerX86Test, PCmpeqB) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); +} + +TEST_F(AssemblerX86Test, PCmpeqW) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "cmpeqw"); +} + +TEST_F(AssemblerX86Test, PCmpeqD) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "cmpeqd"); +} + +TEST_F(AssemblerX86Test, PCmpeqQ) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq"); +} + +TEST_F(AssemblerX86Test, PCmpgtB) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "cmpgtb"); +} + +TEST_F(AssemblerX86Test, PCmpgtW) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "cmpgtw"); +} + +TEST_F(AssemblerX86Test, PCmpgtD) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "cmpgtd"); +} + +TEST_F(AssemblerX86Test, PCmpgtQ) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "cmpgtq"); +} + TEST_F(AssemblerX86Test, ShufPS) { DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 458204aca9..1b7a4850db 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1153,6 +1153,15 @@ void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) { } +void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5B); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -1366,6 +1375,32 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDF); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1392,6 +1427,98 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE0); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE3); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x74); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x75); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x76); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x29); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x64); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x65); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x66); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x37); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 0dc11d840b..0ddc46ca44 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -486,6 +486,7 @@ class X86_64Assembler FINAL : public Assembler { void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version. void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit); + void cvtdq2ps(XmmRegister dst, XmmRegister src); void cvtdq2pd(XmmRegister dst, XmmRegister src); void comiss(XmmRegister a, XmmRegister b); @@ -514,10 +515,27 @@ class X86_64Assembler FINAL : public Assembler { void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pand(XmmRegister dst, XmmRegister src); + void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnps(XmmRegister dst, XmmRegister src); + void pandn(XmmRegister dst, XmmRegister src); + void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); + void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pavgw(XmmRegister dst, XmmRegister src); + + void pcmpeqb(XmmRegister dst, XmmRegister src); + void pcmpeqw(XmmRegister dst, XmmRegister src); + void pcmpeqd(XmmRegister dst, XmmRegister src); + void pcmpeqq(XmmRegister dst, XmmRegister src); + + void pcmpgtb(XmmRegister dst, XmmRegister src); + void pcmpgtw(XmmRegister dst, XmmRegister src); + void pcmpgtd(XmmRegister dst, XmmRegister src); + void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2 + void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm); void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index fe9449720f..e7d8401e29 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1205,6 +1205,10 @@ TEST_F(AssemblerX86_64Test, Cvtsd2ss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss"); } +TEST_F(AssemblerX86_64Test, Cvtdq2ps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps"); +} + TEST_F(AssemblerX86_64Test, Cvtdq2pd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd"); } @@ -1265,6 +1269,18 @@ TEST_F(AssemblerX86_64Test, Pand) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } +TEST_F(AssemblerX86_64Test, andnpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); +} + +TEST_F(AssemblerX86_64Test, andnps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps"); +} + +TEST_F(AssemblerX86_64Test, Pandn) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn"); +} + TEST_F(AssemblerX86_64Test, Orps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps"); } @@ -1277,6 +1293,46 @@ TEST_F(AssemblerX86_64Test, Por) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por"); } +TEST_F(AssemblerX86_64Test, Pavgb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb"); +} + +TEST_F(AssemblerX86_64Test, Pavgw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "pcmpeqw"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "pcmpeqd"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "pcmpgtb"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "pcmpgtw"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "pcmpgtd"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "pcmpgtq"); +} + TEST_F(AssemblerX86_64Test, Shufps) { DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps"); } |