diff options
Diffstat (limited to 'compiler/utils')
| -rw-r--r-- | compiler/utils/arm/assembler_arm_vixl.cc | 26 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm_vixl.h | 23 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2_test.cc | 5 | ||||
| -rw-r--r-- | compiler/utils/arm/jni_macro_assembler_arm_vixl.cc | 31 | ||||
| -rw-r--r-- | compiler/utils/assembler_test.h | 111 | ||||
| -rw-r--r-- | compiler/utils/assembler_test_base.h | 12 | ||||
| -rw-r--r-- | compiler/utils/assembler_thumb_test.cc | 5 | ||||
| -rw-r--r-- | compiler/utils/assembler_thumb_test_expected.cc.inc | 242 | ||||
| -rw-r--r-- | compiler/utils/dedupe_set-inl.h | 17 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 311 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 128 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64_test.cc | 346 |
12 files changed, 1030 insertions, 227 deletions
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index 1614d04a95..e5eef37b7b 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -23,6 +23,9 @@ using namespace vixl::aarch32; // NOLINT(build/namespaces) +using vixl::ExactAssemblyScope; +using vixl::CodeBufferCheckScope; + namespace art { namespace arm { @@ -441,7 +444,7 @@ void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn, return; } Cmp(rn, 0); - B(eq, label); + B(eq, label, is_far_target); } void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, @@ -452,16 +455,16 @@ void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, return; } Cmp(rn, 0); - B(ne, label); + B(ne, label, is_far_target); } void ArmVIXLMacroAssembler::B(vixl32::Label* label) { if (!label->IsBound()) { // Try to use 16-bit T2 encoding of B instruction. DCHECK(OutsideITBlock()); - AssemblerAccurateScope ass(this, - kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + ExactAssemblyScope guard(this, + k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); b(al, Narrow, label); AddBranchLabel(label); return; @@ -469,13 +472,22 @@ void ArmVIXLMacroAssembler::B(vixl32::Label* label) { MacroAssembler::B(label); } -void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label) { +void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) { + if (!label->IsBound() && !is_far_target) { + // Try to use 16-bit T2 encoding of B instruction. + DCHECK(OutsideITBlock()); + ExactAssemblyScope guard(this, + k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + b(cond, Narrow, label); + AddBranchLabel(label); + return; + } // To further reduce the Bcc encoding size and use 16-bit T1 encoding, // we can provide a hint to this function: i.e. far_target=false. // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding. MacroAssembler::B(cond, label); } - } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 17cf1064b0..322f6c4d70 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -39,6 +39,13 @@ namespace arm { class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { public: + // Most methods fit in a 1KB code buffer, which results in more optimal alloc/realloc and + // fewer system calls than a larger default capacity. + static constexpr size_t kDefaultCodeBufferCapacity = 1 * KB; + + ArmVIXLMacroAssembler() + : vixl32::MacroAssembler(ArmVIXLMacroAssembler::kDefaultCodeBufferCapacity) {} + // The following interfaces can generate CMP+Bcc or Cbz/Cbnz. // CMP+Bcc are generated by default. // If a hint is given (is_far_target = false) and rn and label can all fit into Cbz/Cbnz, @@ -114,7 +121,7 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { // TODO: Remove when MacroAssembler::Add(FlagsUpdate, Condition, Register, Register, Operand) // makes the right decision about 16-bit encodings. void Add(vixl32::Register rd, vixl32::Register rn, const vixl32::Operand& operand) { - if (rd.Is(rn)) { + if (rd.Is(rn) && operand.IsPlainRegister()) { MacroAssembler::Add(rd, rn, operand); } else { MacroAssembler::Add(vixl32::DontCare, rd, rn, operand); @@ -124,7 +131,10 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { // These interfaces try to use 16-bit T2 encoding of B instruction. void B(vixl32::Label* label); - void B(vixl32::Condition cond, vixl32::Label* label); + // For B(label), we always try to use Narrow encoding, because 16-bit T2 encoding supports + // jumping within 2KB range. For B(cond, label), because the supported branch range is 256 + // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps. + void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true); }; class ArmVIXLAssembler FINAL : public Assembler { @@ -205,6 +215,15 @@ class ArmVIXLAssembler FINAL : public Assembler { int32_t value, vixl32::Condition cond = vixl32::al); + template <typename T> + vixl::aarch32::Literal<T>* CreateLiteralDestroyedWithPool(T value) { + vixl::aarch32::Literal<T>* literal = + new vixl::aarch32::Literal<T>(value, + vixl32::RawLiteral::kPlacedWhenUsed, + vixl32::RawLiteral::kDeletedOnPoolDestruction); + return literal; + } + private: // VIXL assembler. ArmVIXLMacroAssembler vixl_masm_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 30e8f4e604..0147a76744 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -16,12 +16,15 @@ #include "assembler_thumb2.h" +#include "android-base/stringprintf.h" + #include "base/stl_util.h" -#include "base/stringprintf.h" #include "utils/assembler_test.h" namespace art { +using android::base::StringPrintf; + class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, arm::Register, arm::SRegister, uint32_t> { diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 2d026b83f9..d07c047253 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -24,6 +24,9 @@ using namespace vixl::aarch32; // NOLINT(build/namespaces) namespace vixl32 = vixl::aarch32; +using vixl::ExactAssemblyScope; +using vixl::CodeBufferCheckScope; + namespace art { namespace arm { @@ -455,16 +458,16 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { if (!out_reg.Equals(in_reg)) { - AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - 3 * vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + 3 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); ___ it(eq, 0xc); ___ mov(eq, out_reg.AsVIXLRegister(), 0); asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); } else { - AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - 2 * vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + 2 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); } @@ -493,9 +496,9 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, ___ Cmp(scratch.AsVIXLRegister(), 0); if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { - AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - 2 * vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + 2 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); asm_.AddConstantInIt(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); } else { @@ -586,10 +589,12 @@ void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t s ___ Cmp(scratch.AsVIXLRegister(), 0); { - AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - ___ b(ne, Narrow, exception_blocks_.back()->Entry()); + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + vixl32::Label* label = exception_blocks_.back()->Entry(); + ___ b(ne, Narrow, label); + ___ AddBranchLabel(label); } // TODO: think about using CBNZ here. } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index b34e125866..5c4875951b 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -145,7 +145,8 @@ class AssemblerTest : public testing::Test { const std::vector<Reg2*> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), - const std::string& fmt) { + const std::string& fmt, + int bias = 0) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); @@ -153,7 +154,7 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm); + (assembler_.get()->*f)(*reg1, *reg2, new_imm + bias); std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -171,7 +172,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm; + sreg << imm + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -188,6 +189,67 @@ class AssemblerTest : public testing::Test { return str; } + template <typename Reg1, typename Reg2, typename Reg3, typename ImmType> + std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, Reg3, ImmType), + int imm_bits, + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + const std::vector<Reg3*> reg3_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + std::string (AssemblerTest::*GetName3)(const Reg3&), + std::string fmt, + int bias) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); + + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (auto reg3 : reg3_registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + std::string reg3_string = (this->*GetName3)(*reg3); + size_t reg3_index; + while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { + base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm + bias; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template <typename ImmType, typename Reg1, typename Reg2> std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2), const std::vector<Reg1*> reg1_registers, @@ -245,14 +307,15 @@ class AssemblerTest : public testing::Test { int imm_bits, const std::vector<Reg*> registers, std::string (AssemblerTest::*GetName)(const RegType&), - const std::string& fmt) { + const std::string& fmt, + int bias) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); for (auto reg : registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg, new_imm); + (assembler_.get()->*f)(*reg, new_imm + bias); std::string base = fmt; std::string reg_string = (this->*GetName)(*reg); @@ -264,7 +327,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm; + sreg << imm + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -281,36 +344,60 @@ class AssemblerTest : public testing::Test { } template <typename ImmType> - std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, const std::string& fmt) { + std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f, imm_bits, GetRegisters(), GetRegisters(), &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, - fmt); + fmt, + bias); + } + + template <typename ImmType> + std::string RepeatRRRIb(void (Ass::*f)(Reg, Reg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBits<Reg, Reg, Reg, ImmType>(f, + imm_bits, + GetRegisters(), + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias); } template <typename ImmType> - std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, const std::string& fmt) { + std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt, int bias = 0) { return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f, imm_bits, GetRegisters(), &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, - fmt); + fmt, + bias); } template <typename ImmType> std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, - const std::string& fmt) { + const std::string& fmt, + int bias = 0) { return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f, imm_bits, GetFPRegisters(), GetRegisters(), &AssemblerTest::GetFPRegName, &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, - fmt); + fmt, + bias); } std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), const std::string& fmt) { diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index ac24ee95eb..e7edf96722 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -23,6 +23,8 @@ #include <iterator> #include <sys/stat.h> +#include "android-base/strings.h" + #include "common_runtime_test.h" // For ScratchFile #include "utils.h" @@ -221,7 +223,7 @@ class AssemblerTestInfrastructure { args.push_back("-o"); args.push_back(to_file); args.push_back(from_file); - std::string cmd = Join(args, ' '); + std::string cmd = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -257,7 +259,7 @@ class AssemblerTestInfrastructure { args.push_back(file); args.push_back(">"); args.push_back(file+".dump"); - std::string cmd = Join(args, ' '); + std::string cmd = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -338,7 +340,7 @@ class AssemblerTestInfrastructure { args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'"); args.push_back(">"); args.push_back(file+".dis"); - std::string cmd = Join(args, ' '); + std::string cmd = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -500,7 +502,7 @@ class AssemblerTestInfrastructure { std::string tmp_file = GetTmpnam(); args.push_back(">"); args.push_back(tmp_file); - std::string sh_args = Join(args, ' '); + std::string sh_args = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -541,7 +543,7 @@ class AssemblerTestInfrastructure { args.push_back("sort"); args.push_back(">"); args.push_back(tmp_file); - std::string sh_args = Join(args, ' '); + std::string sh_args = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 50a1d9fd98..4e9b619979 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -1717,6 +1717,11 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { __ ExceptionPoll(scratch_register, 0); + // Push the target out of range of branch emitted by ExceptionPoll. + for (int i = 0; i < 64; i++) { + __ Store(FrameOffset(2047), scratch_register, 4); + } + __ DecreaseFrameSize(4096); __ DecreaseFrameSize(32); __ RemoveFrame(frame_size, callee_save_regs); diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 69e1d8f6fa..ab4f9e944c 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5458,94 +5458,160 @@ const char* const CmpConstantResults[] = { }; const char* const VixlJniHelpersResults[] = { - " 0: e92d 4de0 stmdb sp!, {r5, r6, r7, r8, sl, fp, lr}\n", - " 4: ed2d 8a10 vpush {s16-s31}\n", - " 8: b089 sub sp, #36 ; 0x24\n", - " a: 9000 str r0, [sp, #0]\n", - " c: 9121 str r1, [sp, #132] ; 0x84\n", - " e: ed8d 0a22 vstr s0, [sp, #136] ; 0x88\n", - " 12: 9223 str r2, [sp, #140] ; 0x8c\n", - " 14: 9324 str r3, [sp, #144] ; 0x90\n", - " 16: b088 sub sp, #32\n", - " 18: f5ad 5d80 sub.w sp, sp, #4096 ; 0x1000\n", - " 1c: 9808 ldr r0, [sp, #32]\n", - " 1e: 981f ldr r0, [sp, #124] ; 0x7c\n", - " 20: 9821 ldr r0, [sp, #132] ; 0x84\n", - " 22: 98ff ldr r0, [sp, #1020] ; 0x3fc\n", - " 24: f8dd 0400 ldr.w r0, [sp, #1024] ; 0x400\n", - " 28: f8dd cffc ldr.w ip, [sp, #4092] ; 0xffc\n", - " 2c: f50d 5c80 add.w ip, sp, #4096 ; 0x1000\n", - " 30: f8dc c000 ldr.w ip, [ip]\n", - " 34: f8d9 c200 ldr.w ip, [r9, #512] ; 0x200\n", - " 38: f8dc 0080 ldr.w r0, [ip, #128] ; 0x80\n", - " 3c: 9008 str r0, [sp, #32]\n", - " 3e: 901f str r0, [sp, #124] ; 0x7c\n", - " 40: 9021 str r0, [sp, #132] ; 0x84\n", - " 42: 90ff str r0, [sp, #1020] ; 0x3fc\n", - " 44: f8cd 0400 str.w r0, [sp, #1024] ; 0x400\n", - " 48: f8cd cffc str.w ip, [sp, #4092] ; 0xffc\n", - " 4c: f84d 5d04 str.w r5, [sp, #-4]!\n", - " 50: f50d 5580 add.w r5, sp, #4096 ; 0x1000\n", - " 54: f8c5 c004 str.w ip, [r5, #4]\n", - " 58: f85d 5b04 ldr.w r5, [sp], #4\n", - " 5c: f04f 0cff mov.w ip, #255 ; 0xff\n", - " 60: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " 64: f06f 4c7f mvn.w ip, #4278190080 ; 0xff000000\n", - " 68: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " 6c: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " 70: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " 74: 900c str r0, [sp, #48] ; 0x30\n", - " 76: f8dd c030 ldr.w ip, [sp, #48] ; 0x30\n", - " 7a: f8cd c034 str.w ip, [sp, #52] ; 0x34\n", - " 7e: f50d 5c80 add.w ip, sp, #4096 ; 0x1000\n", - " 82: f8c9 c200 str.w ip, [r9, #512] ; 0x200\n", - " 86: f8c9 d200 str.w sp, [r9, #512] ; 0x200\n", - " 8a: f8d0 c030 ldr.w ip, [r0, #48] ; 0x30\n", - " 8e: 47e0 blx ip\n", - " 90: f8dd c02c ldr.w ip, [sp, #44] ; 0x2c\n", - " 94: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " 98: f8d9 c200 ldr.w ip, [r9, #512] ; 0x200\n", - " 9c: f8cd c02c str.w ip, [sp, #44] ; 0x2c\n", - " a0: f8dd c02c ldr.w ip, [sp, #44] ; 0x2c\n", - " a4: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " a8: 4648 mov r0, r9\n", - " aa: f8cd 9030 str.w r9, [sp, #48] ; 0x30\n", - " ae: 4684 mov ip, r0\n", - " b0: f1bc 0f00 cmp.w ip, #0\n", - " b4: bf18 it ne\n", - " b6: f10d 0c30 addne.w ip, sp, #48 ; 0x30\n", - " ba: f10d 0c30 add.w ip, sp, #48 ; 0x30\n", - " be: f1bc 0f00 cmp.w ip, #0\n", - " c2: bf0c ite eq\n", - " c4: 2000 moveq r0, #0\n", - " c6: a80c addne r0, sp, #48 ; 0x30\n", - " c8: f8dd c040 ldr.w ip, [sp, #64] ; 0x40\n", - " cc: f1bc 0f00 cmp.w ip, #0\n", - " d0: bf18 it ne\n", - " d2: f10d 0c40 addne.w ip, sp, #64 ; 0x40\n", - " d6: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", - " da: f1bc 0f00 cmp.w ip, #0\n", - " de: bf0c ite eq\n", - " e0: 2000 moveq r0, #0\n", - " e2: 4668 movne r0, sp\n", - " e4: f1bc 0f00 cmp.w ip, #0\n", - " e8: bf0c ite eq\n", - " ea: 2000 moveq r0, #0\n", - " ec: f20d 4001 addwne r0, sp, #1025 ; 0x401\n", - " f0: f1bc 0f00 cmp.w ip, #0\n", - " f4: bf18 it ne\n", - " f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n", - " fa: f8d9 c084 ldr.w ip, [r9, #132] ; 0x84\n", - " fe: f1bc 0f00 cmp.w ip, #0\n", - " 102: d107 bne.n 114 <VixlJniHelpers+0x114>\n", - " 104: f50d 5d80 add.w sp, sp, #4096 ; 0x1000\n", - " 108: b008 add sp, #32\n", - " 10a: b009 add sp, #36 ; 0x24\n", - " 10c: ecbd 8a10 vpop {s16-s31}\n", - " 110: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", - " 114: 4660 mov r0, ip\n", - " 116: f8d9 c2b0 ldr.w ip, [r9, #688] ; 0x2b0\n", - " 11a: 47e0 blx ip\n", + " 0: e92d 4de0 stmdb sp!, {r5, r6, r7, r8, sl, fp, lr}\n", + " 4: ed2d 8a10 vpush {s16-s31}\n", + " 8: b089 sub sp, #36 ; 0x24\n", + " a: 9000 str r0, [sp, #0]\n", + " c: 9121 str r1, [sp, #132] ; 0x84\n", + " e: ed8d 0a22 vstr s0, [sp, #136] ; 0x88\n", + " 12: 9223 str r2, [sp, #140] ; 0x8c\n", + " 14: 9324 str r3, [sp, #144] ; 0x90\n", + " 16: b088 sub sp, #32\n", + " 18: f5ad 5d80 sub.w sp, sp, #4096 ; 0x1000\n", + " 1c: 9808 ldr r0, [sp, #32]\n", + " 1e: 981f ldr r0, [sp, #124] ; 0x7c\n", + " 20: 9821 ldr r0, [sp, #132] ; 0x84\n", + " 22: 98ff ldr r0, [sp, #1020] ; 0x3fc\n", + " 24: f8dd 0400 ldr.w r0, [sp, #1024] ; 0x400\n", + " 28: f8dd cffc ldr.w ip, [sp, #4092] ; 0xffc\n", + " 2c: f50d 5c80 add.w ip, sp, #4096 ; 0x1000\n", + " 30: f8dc c000 ldr.w ip, [ip]\n", + " 34: f8d9 c200 ldr.w ip, [r9, #512] ; 0x200\n", + " 38: f8dc 0080 ldr.w r0, [ip, #128] ; 0x80\n", + " 3c: 9008 str r0, [sp, #32]\n", + " 3e: 901f str r0, [sp, #124] ; 0x7c\n", + " 40: 9021 str r0, [sp, #132] ; 0x84\n", + " 42: 90ff str r0, [sp, #1020] ; 0x3fc\n", + " 44: f8cd 0400 str.w r0, [sp, #1024] ; 0x400\n", + " 48: f8cd cffc str.w ip, [sp, #4092] ; 0xffc\n", + " 4c: f84d 5d04 str.w r5, [sp, #-4]!\n", + " 50: f50d 5580 add.w r5, sp, #4096 ; 0x1000\n", + " 54: f8c5 c004 str.w ip, [r5, #4]\n", + " 58: f85d 5b04 ldr.w r5, [sp], #4\n", + " 5c: f04f 0cff mov.w ip, #255 ; 0xff\n", + " 60: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 64: f06f 4c7f mvn.w ip, #4278190080 ; 0xff000000\n", + " 68: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 6c: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 70: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 74: 900c str r0, [sp, #48] ; 0x30\n", + " 76: f8dd c030 ldr.w ip, [sp, #48] ; 0x30\n", + " 7a: f8cd c034 str.w ip, [sp, #52] ; 0x34\n", + " 7e: f50d 5c80 add.w ip, sp, #4096 ; 0x1000\n", + " 82: f8c9 c200 str.w ip, [r9, #512] ; 0x200\n", + " 86: f8c9 d200 str.w sp, [r9, #512] ; 0x200\n", + " 8a: f8d0 c030 ldr.w ip, [r0, #48] ; 0x30\n", + " 8e: 47e0 blx ip\n", + " 90: f8dd c02c ldr.w ip, [sp, #44] ; 0x2c\n", + " 94: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 98: f8d9 c200 ldr.w ip, [r9, #512] ; 0x200\n", + " 9c: f8cd c02c str.w ip, [sp, #44] ; 0x2c\n", + " a0: f8dd c02c ldr.w ip, [sp, #44] ; 0x2c\n", + " a4: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " a8: 4648 mov r0, r9\n", + " aa: f8cd 9030 str.w r9, [sp, #48] ; 0x30\n", + " ae: 4684 mov ip, r0\n", + " b0: f1bc 0f00 cmp.w ip, #0\n", + " b4: bf18 it ne\n", + " b6: f10d 0c30 addne.w ip, sp, #48 ; 0x30\n", + " ba: f10d 0c30 add.w ip, sp, #48 ; 0x30\n", + " be: f1bc 0f00 cmp.w ip, #0\n", + " c2: bf0c ite eq\n", + " c4: 2000 moveq r0, #0\n", + " c6: a80c addne r0, sp, #48 ; 0x30\n", + " c8: f8dd c040 ldr.w ip, [sp, #64] ; 0x40\n", + " cc: f1bc 0f00 cmp.w ip, #0\n", + " d0: bf18 it ne\n", + " d2: f10d 0c40 addne.w ip, sp, #64 ; 0x40\n", + " d6: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " da: f1bc 0f00 cmp.w ip, #0\n", + " de: bf0c ite eq\n", + " e0: 2000 moveq r0, #0\n", + " e2: 4668 movne r0, sp\n", + " e4: f1bc 0f00 cmp.w ip, #0\n", + " e8: bf0c ite eq\n", + " ea: 2000 moveq r0, #0\n", + " ec: f20d 4001 addwne r0, sp, #1025 ; 0x401\n", + " f0: f1bc 0f00 cmp.w ip, #0\n", + " f4: bf18 it ne\n", + " f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n", + " fa: f8d9 c084 ldr.w ip, [r9, #132] ; 0x84\n", + " fe: f1bc 0f00 cmp.w ip, #0\n", + " 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n", + " 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 108: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 10c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 110: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 114: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 118: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 11c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 120: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 124: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 128: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 12c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 130: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 134: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 138: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 13c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 140: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 144: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 148: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 14c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 150: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 154: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 158: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 15c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 160: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 164: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 168: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 16c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 170: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 174: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 178: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 17c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 180: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 184: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 188: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 18c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 190: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 194: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 198: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 19c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1a0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1a4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1a8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1ac: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1b0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1b4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1b8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1bc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1c0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1c4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1c8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1cc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1d0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1d4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1d8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1dc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1e0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1e4: f000 b802 b.w 1ec <VixlJniHelpers+0x1ec>\n", + " 1e8: f000 b818 b.w 21c <VixlJniHelpers+0x21c>\n", + " 1ec: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1f0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1f4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1f8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1fc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 200: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 204: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 208: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 20c: f50d 5d80 add.w sp, sp, #4096 ; 0x1000\n", + " 210: b008 add sp, #32\n", + " 212: b009 add sp, #36 ; 0x24\n", + " 214: ecbd 8a10 vpop {s16-s31}\n", + " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", + " 21c: 4660 mov r0, ip\n", + " 21e: f8d9 c2b0 ldr.w ip, [r9, #688] ; 0x2b0\n", + " 222: 47e0 blx ip\n", nullptr }; diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h index ac5481336b..c06e9cadcc 100644 --- a/compiler/utils/dedupe_set-inl.h +++ b/compiler/utils/dedupe_set-inl.h @@ -23,10 +23,11 @@ #include <inttypes.h> #include <unordered_map> +#include "android-base/stringprintf.h" + #include "base/mutex.h" #include "base/hash_set.h" #include "base/stl_util.h" -#include "base/stringprintf.h" #include "base/time_utils.h" namespace art { @@ -238,13 +239,13 @@ std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpS for (HashType shard = 0; shard < kShard; ++shard) { shards_[shard]->UpdateStats(self, &stats); } - return StringPrintf("%zu collisions, %zu max hash collisions, " - "%zu/%zu probe distance, %" PRIu64 " ns hash time", - stats.collision_sum, - stats.collision_max, - stats.total_probe_distance, - stats.total_size, - hash_time_); + return android::base::StringPrintf("%zu collisions, %zu max hash collisions, " + "%zu/%zu probe distance, %" PRIu64 " ns hash time", + stats.collision_sum, + stats.collision_max, + stats.total_probe_distance, + stats.total_size, + hash_time_); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 1a21df939e..5906a71b38 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -35,6 +35,7 @@ void Mips64Assembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + EmitLiterals(); PromoteBranches(); } @@ -318,6 +319,18 @@ void Mips64Assembler::Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size) { EmitR(0x1f, rs, rt, static_cast<GpuRegister>(pos + size - 33), pos - 32, 0x6); } +void Mips64Assembler::Lsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne) { + CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne; + int sa = saPlusOne - 1; + EmitR(0x0, rs, rt, rd, sa, 0x05); +} + +void Mips64Assembler::Dlsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne) { + CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne; + int sa = saPlusOne - 1; + EmitR(0x0, rs, rt, rd, sa, 0x15); +} + void Mips64Assembler::Wsbh(GpuRegister rd, GpuRegister rt) { EmitRtd(0x1f, rt, rd, 2, 0x20); } @@ -450,6 +463,21 @@ void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0x27, rs, rt, imm16); } +void Mips64Assembler::Lwpc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x01 << 19) | imm19); +} + +void Mips64Assembler::Lwupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x02 << 19) | imm19); +} + +void Mips64Assembler::Ldpc(GpuRegister rs, uint32_t imm18) { + CHECK(IsUint<18>(imm18)) << imm18; + EmitI21(0x3B, rs, (0x06 << 18) | imm18); +} + void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) { EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16); } @@ -548,6 +576,10 @@ void Mips64Assembler::Bc(uint32_t imm26) { EmitI26(0x32, imm26); } +void Mips64Assembler::Balc(uint32_t imm26) { + EmitI26(0x3A, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -1064,19 +1096,37 @@ void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBit type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void Mips64Assembler::Branch::InitializeType(bool is_call) { +void Mips64Assembler::Branch::InitializeType(Type initial_type) { OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); - if (is_call) { - InitShortOrLong(offset_size, kCall, kLongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); - } else { - if (condition_ == kCondEQZ || condition_ == kCondNEZ) { - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; - } else { - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); - } + switch (initial_type) { + case kLabel: + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: + CHECK(!IsResolved()); + type_ = initial_type; + break; + case kCall: + InitShortOrLong(offset_size, kCall, kLongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); } old_type_ = type_; } @@ -1109,14 +1159,14 @@ bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, } } -Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call) : old_location_(location), location_(location), target_(target), lhs_reg_(ZERO), rhs_reg_(ZERO), condition_(kUncond) { - InitializeType(false); + InitializeType(is_call ? kCall : kCondBranch); } Mips64Assembler::Branch::Branch(uint32_t location, @@ -1164,19 +1214,18 @@ Mips64Assembler::Branch::Branch(uint32_t location, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(false); + InitializeType(kCondBranch); } -Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) +Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type) : old_location_(location), location_(location), - target_(target), - lhs_reg_(indirect_reg), + target_(kUnresolved), + lhs_reg_(dest_reg), rhs_reg_(ZERO), condition_(kUncond) { - CHECK_NE(indirect_reg, ZERO); - CHECK_NE(indirect_reg, AT); - InitializeType(true); + CHECK_NE(dest_reg, ZERO); + InitializeType(label_or_literal_type); } Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( @@ -1278,11 +1327,23 @@ bool Mips64Assembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // Near label. + case kLabel: + // Near literals. + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: return false; // Long branches. case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // Far label. + case kFarLabel: + // Far literals. + case kFarLiteral: + case kFarLiteralUnsigned: + case kFarLiteralLong: return true; } UNREACHABLE(); @@ -1351,6 +1412,20 @@ void Mips64Assembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // Near label. + case kLabel: + type_ = kFarLabel; + break; + // Near literals. + case kLiteral: + type_ = kFarLiteral; + break; + case kLiteralUnsigned: + type_ = kFarLiteralUnsigned; + break; + case kLiteralLong: + type_ = kFarLiteralLong; + break; default: // Note: 'type_' is already long. break; @@ -1397,7 +1472,15 @@ uint32_t Mips64Assembler::Branch::GetOffset() const { uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for // different PC-relative origins. - uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + uint32_t offset_location = GetOffsetLocation(); + if (type_ == kLiteralLong) { + // Special case for the ldpc instruction, whose address (PC) is rounded down to + // a multiple of 8 before adding the offset. + // Note, branch promotion has already taken care of aligning `target_` to an + // address that's a multiple of 8. + offset_location = RoundDown(offset_location, sizeof(uint64_t)); + } + uint32_t offset = target_ - offset_location - branch_info_[type_].pc_org * sizeof(uint32_t); // Prepare the offset for encoding into the instruction(s). offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; return offset; @@ -1444,7 +1527,7 @@ void Mips64Assembler::Bind(Mips64Label* label) { label->BindTo(bound_pc); } -uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { +uint32_t Mips64Assembler::GetLabelLocation(const Mips64Label* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); if (label->prev_branch_id_plus_one_) { @@ -1500,7 +1583,7 @@ void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { void Mips64Assembler::Buncond(Mips64Label* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target); + branches_.emplace_back(buffer_.Size(), target, /* is_call */ false); FinalizeLabeledBranch(label); } @@ -1517,12 +1600,87 @@ void Mips64Assembler::Bcond(Mips64Label* label, FinalizeLabeledBranch(label); } -void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { +void Mips64Assembler::Call(Mips64Label* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, indirect_reg); + branches_.emplace_back(buffer_.Size(), target, /* is_call */ true); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label) { + // Label address loads are treated as pseudo branches since they require very similar handling. + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), dest_reg, Branch::kLabel); FinalizeLabeledBranch(label); } +Literal* Mips64Assembler::NewLiteral(size_t size, const uint8_t* data) { + // We don't support byte and half-word literals. + if (size == 4u) { + literals_.emplace_back(size, data); + return &literals_.back(); + } else { + DCHECK_EQ(size, 8u); + long_literals_.emplace_back(size, data); + return &long_literals_.back(); + } +} + +void Mips64Assembler::LoadLiteral(GpuRegister dest_reg, + LoadOperandType load_type, + Literal* literal) { + // Literal loads are treated as pseudo branches since they require very similar handling. + Branch::Type literal_type; + switch (load_type) { + case kLoadWord: + DCHECK_EQ(literal->GetSize(), 4u); + literal_type = Branch::kLiteral; + break; + case kLoadUnsignedWord: + DCHECK_EQ(literal->GetSize(), 4u); + literal_type = Branch::kLiteralUnsigned; + break; + case kLoadDoubleword: + DCHECK_EQ(literal->GetSize(), 8u); + literal_type = Branch::kLiteralLong; + break; + default: + LOG(FATAL) << "Unexpected literal load type " << load_type; + UNREACHABLE(); + } + Mips64Label* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), dest_reg, literal_type); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::EmitLiterals() { + if (!literals_.empty()) { + for (Literal& literal : literals_) { + Mips64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 4u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } + if (!long_literals_.empty()) { + // Reserve 4 bytes for potential alignment. If after the branch promotion the 64-bit + // literals don't end up 8-byte-aligned, they will be moved down 4 bytes. + Emit(0); // NOP. + for (Literal& literal : long_literals_) { + Mips64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + void Mips64Assembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -1561,6 +1719,35 @@ void Mips64Assembler::PromoteBranches() { end = branch.GetOldLocation(); } } + + // Align 64-bit literals by moving them down by 4 bytes if needed. + // This will reduce the PC-relative distance, which should be safe for both near and far literals. + if (!long_literals_.empty()) { + uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel()); + size_t lit_size = long_literals_.size() * sizeof(uint64_t); + size_t buf_size = buffer_.Size(); + // 64-bit literals must be at the very end of the buffer. + CHECK_EQ(first_literal_location + lit_size, buf_size); + if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) { + buffer_.Move(first_literal_location - sizeof(uint32_t), first_literal_location, lit_size); + // The 4 reserved bytes proved useless, reduce the buffer size. + buffer_.Resize(buf_size - sizeof(uint32_t)); + // Reduce target addresses in literal and address loads by 4 bytes in order for correct + // offsets from PC to be generated. + for (auto& branch : branches_) { + uint32_t target = branch.GetTarget(); + if (target >= first_literal_location) { + branch.Resolve(target - sizeof(uint32_t)); + } + } + // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal, + // we need to adjust the location of the literal's label as well. + for (Literal& literal : long_literals_) { + // Bound label's position is negative, hence incrementing it instead of decrementing. + literal.GetLabel()->position_ += sizeof(uint32_t); + } + } + } } // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. @@ -1569,11 +1756,23 @@ const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[ { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch // Exception: kOffset23 for beqzc/bnezc - { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kCall + // Near label. + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLabel + // Near literals. + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLiteral + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLiteralUnsigned + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 3 }, // kLiteralLong // Long branches. { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch - { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall + // Far label. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLabel + // Far literals. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLiteral + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLiteralUnsigned + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLiteralLong }; // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. @@ -1597,8 +1796,26 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { break; case Branch::kCall: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Balc(offset); + break; + + // Near label. + case Branch::kLabel: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Addiupc(lhs, offset); - Jialc(lhs, 0); + break; + // Near literals. + case Branch::kLiteral: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwpc(lhs, offset); + break; + case Branch::kLiteralUnsigned: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwupc(lhs, offset); + break; + case Branch::kLiteralLong: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Ldpc(lhs, offset); break; // Long branches. @@ -1616,11 +1833,37 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jialc(AT, Low16Bits(offset)); + break; + + // Far label. + case Branch::kFarLabel: offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Auipc(lhs, High16Bits(offset)); - Daddiu(lhs, lhs, Low16Bits(offset)); - Jialc(lhs, 0); + Auipc(AT, High16Bits(offset)); + Daddiu(lhs, AT, Low16Bits(offset)); + break; + // Far literals. + case Branch::kFarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lw(lhs, AT, Low16Bits(offset)); + break; + case Branch::kFarLiteralUnsigned: + offset += (offset & 0x8000) << 1; // Account for sign extension in lwu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lwu(lhs, AT, Low16Bits(offset)); + break; + case Branch::kFarLiteralLong: + offset += (offset & 0x8000) << 1; // Account for sign extension in ld. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Ld(lhs, AT, Low16Bits(offset)); break; } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); @@ -1631,8 +1874,8 @@ void Mips64Assembler::Bc(Mips64Label* label) { Buncond(label); } -void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { - Call(label, indirect_reg); +void Mips64Assembler::Balc(Mips64Label* label) { + Call(label); } void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 238cb9d765..7ef5ab0d39 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,9 +17,11 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <deque> #include <utility> #include <vector> +#include "base/arena_containers.h" #include "base/enums.h" #include "base/macros.h" #include "constants_mips64.h" @@ -312,6 +314,49 @@ class Mips64Label : public Label { DISALLOW_COPY_AND_ASSIGN(Mips64Label); }; +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + Mips64Label* GetLabel() { + return &label_; + } + + const Mips64Label* GetLabel() const { + return &label_; + } + + private: + Mips64Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class Mips64ExceptionSlowPath { public: @@ -341,6 +386,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer : Assembler(arena), overwriting_(false), overwrite_location_(0), + literals_(arena->Adapter(kArenaAllocAssembler)), + long_literals_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0) { @@ -386,18 +433,20 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Bitswap(GpuRegister rd, GpuRegister rt); - void Dbitswap(GpuRegister rd, GpuRegister rt); + void Dbitswap(GpuRegister rd, GpuRegister rt); // MIPS64 void Seb(GpuRegister rd, GpuRegister rt); void Seh(GpuRegister rd, GpuRegister rt); - void Dsbh(GpuRegister rd, GpuRegister rt); - void Dshd(GpuRegister rd, GpuRegister rt); + void Dsbh(GpuRegister rd, GpuRegister rt); // MIPS64 + void Dshd(GpuRegister rd, GpuRegister rt); // MIPS64 void Dext(GpuRegister rs, GpuRegister rt, int pos, int size); // MIPS64 void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size); // MIPS64 + void Lsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne); + void Dlsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne); // MIPS64 void Wsbh(GpuRegister rd, GpuRegister rt); void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); - void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); + void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); // MIPS64 void Ll(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); - void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); + void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); // MIPS64 void Sll(GpuRegister rd, GpuRegister rt, int shamt); void Srl(GpuRegister rd, GpuRegister rt, int shamt); @@ -409,7 +458,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs); void Dsll(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsrl(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 - void Drotr(GpuRegister rd, GpuRegister rt, int shamt); + void Drotr(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsra(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsll32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 @@ -427,6 +476,9 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 + void Lwpc(GpuRegister rs, uint32_t imm19); + void Lwupc(GpuRegister rs, uint32_t imm19); // MIPS64 + void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64 void Lui(GpuRegister rt, uint16_t imm16); void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 @@ -445,8 +497,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Selnez(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Clz(GpuRegister rd, GpuRegister rs); void Clo(GpuRegister rd, GpuRegister rs); - void Dclz(GpuRegister rd, GpuRegister rs); - void Dclo(GpuRegister rd, GpuRegister rs); + void Dclz(GpuRegister rd, GpuRegister rs); // MIPS64 + void Dclo(GpuRegister rd, GpuRegister rs); // MIPS64 void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); @@ -454,6 +506,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Auipc(GpuRegister rs, uint16_t imm16); void Addiupc(GpuRegister rs, uint32_t imm19); void Bc(uint32_t imm26); + void Balc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -605,8 +658,26 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer UNREACHABLE(); } + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Load label address using PC-relative loads. To be used with data labels in the literal / + // jump table area only and not with regular code labels. + void LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label); + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Load literal using PC-relative loads. + void LoadLiteral(GpuRegister dest_reg, LoadOperandType load_type, Literal* literal); + void Bc(Mips64Label* label); - void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Balc(Mips64Label* label); void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); void Bltzc(GpuRegister rt, Mips64Label* label); void Bgtzc(GpuRegister rt, Mips64Label* label); @@ -756,12 +827,15 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, // must be used instead of Mips64Label::GetPosition()). - uint32_t GetLabelLocation(Mips64Label* label) const; + uint32_t GetLabelLocation(const Mips64Label* label) const; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position); + // Note that PC-relative literal loads are handled as pseudo branches because they need very + // similar relocation and may similarly expand in size to accomodate for larger offsets relative + // to PC. enum BranchCondition { kCondLT, kCondGE, @@ -791,10 +865,22 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer kUncondBranch, kCondBranch, kCall, + // Near label. + kLabel, + // Near literals. + kLiteral, + kLiteralUnsigned, + kLiteralLong, // Long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // Far label. + kFarLabel, + // Far literals. + kFarLiteral, + kFarLiteralUnsigned, + kFarLiteralLong, }; // Bit sizes of offsets defined as enums to minimize chance of typos. @@ -830,16 +916,16 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer }; static const BranchInfo branch_info_[/* Type */]; - // Unconditional branch. - Branch(uint32_t location, uint32_t target); + // Unconditional branch or call. + Branch(uint32_t location, uint32_t target, bool is_call); // Conditional branch. Branch(uint32_t location, uint32_t target, BranchCondition condition, GpuRegister lhs_reg, - GpuRegister rhs_reg = ZERO); - // Call (branch and link) that stores the target address in a given register (i.e. T9). - Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + GpuRegister rhs_reg); + // Label address (in literal area) or literal. + Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -923,7 +1009,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call); + void InitializeType(Type initial_type); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -932,7 +1018,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer uint32_t target_; // Offset into assembler buffer in bytes. GpuRegister lhs_reg_; // Left-hand side register in conditional branches or - // indirect call register. + // destination register in literals. GpuRegister rhs_reg_; // Right-hand side register in conditional branches. BranchCondition condition_; // Condition for conditional branches. @@ -957,12 +1043,13 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer BranchCondition condition, GpuRegister lhs, GpuRegister rhs = ZERO); - void Call(Mips64Label* label, GpuRegister indirect_reg); + void Call(Mips64Label* label); void FinalizeLabeledBranch(Mips64Label* label); Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; + void EmitLiterals(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -981,6 +1068,11 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // The current overwrite location. uint32_t overwrite_location_; + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index ba8f25ea77..564559f92c 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -576,83 +576,83 @@ TEST_F(AssemblerMIPS64Test, Jalr) { RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); } -TEST_F(AssemblerMIPS64Test, Jialc) { +TEST_F(AssemblerMIPS64Test, Balc) { mips64::Mips64Label label1, label2; - __ Jialc(&label1, mips64::T9); + __ Balc(&label1); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); } __ Bind(&label1); - __ Jialc(&label2, mips64::T9); + __ Balc(&label2); constexpr size_t kAdduCount2 = 64; for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); } __ Bind(&label2); - __ Jialc(&label1, mips64::T9); + __ Balc(&label1); std::string expected = ".set noreorder\n" - "lapc $t9, 1f\n" - "jialc $t9, 0\n" + + "balc 1f\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" - "lapc $t9, 2f\n" - "jialc $t9, 0\n" + + "balc 2f\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + "2:\n" - "lapc $t9, 1b\n" - "jialc $t9, 0\n"; - DriverStr(expected, "Jialc"); + "balc 1b\n"; + DriverStr(expected, "Balc"); } -TEST_F(AssemblerMIPS64Test, LongJialc) { +TEST_F(AssemblerMIPS64Test, LongBalc) { + constexpr uint32_t kNopCount1 = (1u << 25) + 1; + constexpr uint32_t kNopCount2 = (1u << 25) + 1; + constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; + ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); + __ GetBuffer()->ExtendCapacity(kRequiredCapacity); mips64::Mips64Label label1, label2; - __ Jialc(&label1, mips64::T9); - constexpr uint32_t kAdduCount1 = (1u << 18) + 1; - for (uint32_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + __ Balc(&label1); + for (uint32_t i = 0; i != kNopCount1; ++i) { + __ Nop(); } __ Bind(&label1); - __ Jialc(&label2, mips64::T9); - constexpr uint32_t kAdduCount2 = (1u << 18) + 1; - for (uint32_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + __ Balc(&label2); + for (uint32_t i = 0; i != kNopCount2; ++i) { + __ Nop(); } __ Bind(&label2); - __ Jialc(&label1, mips64::T9); + __ Balc(&label1); - uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jialc. offset_forward1 <<= 2; - offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jialc. - uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jialc. offset_forward2 <<= 2; - offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jialc. - uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jialc. offset_back <<= 2; - offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jialc. + // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs + // instead of generating them ourselves in the source code. This saves a few minutes + // of test time. std::ostringstream oss; oss << ".set noreorder\n" - "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" - "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" - "jialc $t9, 0\n" << - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" "1:\n" - "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" - "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" - "jialc $t9, 0\n" << - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" "2:\n" - "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" - "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" - "jialc $t9, 0\n"; + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"; std::string expected = oss.str(); - DriverStr(expected, "LongJialc"); + DriverStr(expected, "LongBalc"); } TEST_F(AssemblerMIPS64Test, Bc) { @@ -827,6 +827,258 @@ TEST_F(AssemblerMIPS64Test, LongBeqc) { // MISC // ////////// +TEST_F(AssemblerMIPS64Test, Lwpc) { + // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset, + // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`. + // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right + // by 2 positions when encoding, hence `<< 2` to compensate for that shift. + // We capture the value of the immediate with `.set imm, {imm}` because the value is needed + // twice for the sign extension, but `{imm}` is substituted only once. + const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lwpc, 19, code), "Lwpc"); +} + +TEST_F(AssemblerMIPS64Test, Lwupc) { + // The comment for the Lwpc test applies here as well. + const char* code = ".set imm, {imm}\nlwu ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lwupc, 19, code), "Lwupc"); +} + +TEST_F(AssemblerMIPS64Test, Ldpc) { + // The comment for the Lwpc test applies here as well. + const char* code = ".set imm, {imm}\nld ${reg}, ((imm - ((imm & 0x20000) << 1)) << 3)($pc)"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Ldpc, 18, code), "Ldpc"); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) { + mips64::Mips64Label label; + __ LoadLabelAddress(mips64::V0, &label); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + + std::string expected = + "lapc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); + EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) { + mips64::Mips64Label label; + __ LoadLabelAddress(mips64::V0, &label); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "daddiu $v0, $at, %lo(2f - 1b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); + EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "lwupc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteralUnsigned"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lwu $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteralUnsigned"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + constexpr uint32_t kAdduCount = 0x3FFDD; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "ldpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "LoadFarthestNearLiteralLong"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "ld $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "LoadNearestFarLiteralLong"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) { + mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); + mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA)); + __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); + __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); + __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3); + __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); + __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); + // A nop will be inserted here before the 64-bit literals. + + std::string expected = + "ldpc $a1, 1f\n" + // The GNU assembler incorrectly requires the ldpc instruction to be located + // at an address that's a multiple of 8. TODO: Remove this workaround if/when + // the assembler is fixed. + // "ldpc $a2, 2f\n" + ".word 0xECD80004\n" + "ldpc $a3, 3f\n" + "lapc $v0, 1f\n" + "lapc $v1, 2f\n" + "nop\n" + "1:\n" + ".dword 0x0123456789ABCDEF\n" + "2:\n" + ".dword 0x5555555555555555\n" + "3:\n" + ".dword 0xAAAAAAAAAAAAAAAA\n"; + DriverStr(expected, "LongLiteralAlignmentNop"); + EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u); +} + +TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) { + mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); + __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); + __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); + __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); + __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); + + std::string expected = + "ldpc $a1, 1f\n" + // The GNU assembler incorrectly requires the ldpc instruction to be located + // at an address that's a multiple of 8. TODO: Remove this workaround if/when + // the assembler is fixed. + // "ldpc $a2, 2f\n" + ".word 0xECD80003\n" + "lapc $v0, 1f\n" + "lapc $v1, 2f\n" + "1:\n" + ".dword 0x0123456789ABCDEF\n" + "2:\n" + ".dword 0x5555555555555555\n"; + DriverStr(expected, "LongLiteralAlignmentNoNop"); + EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u); +} + +TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + __ LoadLabelAddress(mips64::V1, literal->GetLabel()); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + // A nop will be inserted here before the 64-bit literal. + + std::string expected = + "1:\n" + "auipc $at, %hi(3f - 1b)\n" + "ld $v0, %lo(3f - 1b)($at)\n" + "2:\n" + "auipc $at, %hi(3f - 2b)\n" + "daddiu $v1, $at, %lo(3f - 2b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "nop\n" + "3:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "FarLongLiteralAlignmentNop"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4); +} + TEST_F(AssemblerMIPS64Test, Bitswap) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); } @@ -889,6 +1141,22 @@ TEST_F(AssemblerMIPS64Test, Dinsu) { DriverStr(expected.str(), "Dinsu"); } +TEST_F(AssemblerMIPS64Test, Lsa) { + DriverStr(RepeatRRRIb(&mips64::Mips64Assembler::Lsa, + 2, + "lsa ${reg1}, ${reg2}, ${reg3}, {imm}", + 1), + "lsa"); +} + +TEST_F(AssemblerMIPS64Test, Dlsa) { + DriverStr(RepeatRRRIb(&mips64::Mips64Assembler::Dlsa, + 2, + "dlsa ${reg1}, ${reg2}, ${reg3}, {imm}", + 1), + "dlsa"); +} + TEST_F(AssemblerMIPS64Test, Wsbh) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh"); } |