diff options
Diffstat (limited to 'compiler/utils')
| -rw-r--r-- | compiler/utils/arm/assembler_arm.h | 14 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm32.cc | 1 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm32.h | 2 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2.cc | 101 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2.h | 2 | ||||
| -rw-r--r-- | compiler/utils/assembler_test.h | 80 | ||||
| -rw-r--r-- | compiler/utils/assembler_thumb_test.cc | 260 | ||||
| -rw-r--r-- | compiler/utils/assembler_thumb_test_expected.cc.inc | 335 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 328 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips.h | 182 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips_test.cc | 321 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 1119 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 413 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64_test.cc | 438 |
14 files changed, 3061 insertions, 535 deletions
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 4a6e6d7c3f..b79c2f0f4e 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -22,6 +22,7 @@ #include "base/bit_utils.h" #include "base/logging.h" +#include "base/stl_util.h" #include "base/value_object.h" #include "constants_arm.h" #include "utils/arm/managed_register_arm.h" @@ -697,10 +698,9 @@ class ArmAssembler : public Assembler { // Most of these are pure virtual as they need to be implemented per instruction set. // Create a new literal with a given value. - // NOTE: Force the template parameter to be explicitly specified. In the absence of - // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>. + // NOTE: Force the template parameter to be explicitly specified. template <typename T> - Literal* NewLiteral(typename std::decay<T>::type value) { + Literal* NewLiteral(typename Identity<T>::type value) { static_assert(std::is_integral<T>::value, "T must be an integral type."); return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); } @@ -878,7 +878,15 @@ class ArmAssembler : public Assembler { Register rn, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) = 0; + bool ShifterOperandCanHold(Register rd, + Register rn, + Opcode opcode, + uint32_t immediate, + ShifterOperand* shifter_op) { + return ShifterOperandCanHold(rd, rn, opcode, immediate, kCcDontCare, shifter_op); + } virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index a7dbacd3a9..ebca25bbf9 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -57,6 +57,7 @@ bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode ATTRIBUTE_UNUSED, uint32_t immediate, + SetCc set_cc ATTRIBUTE_UNUSED, ShifterOperand* shifter_op) { return ShifterOperandCanHoldArm32(immediate, shifter_op); } diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index ce3a87275d..bf332feb62 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -297,7 +297,9 @@ class Arm32Assembler FINAL : public ArmAssembler { Register rn, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) OVERRIDE; + using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 7ad5b440e0..f341030c15 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -500,6 +500,7 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) { shifter_op->type_ = ShifterOperand::kImmediate; shifter_op->immed_ = immediate; @@ -508,7 +509,8 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, switch (opcode) { case ADD: case SUB: - if (immediate < (1 << 12)) { // Less than (or equal to) 12 bits can always be done. + // Less than (or equal to) 12 bits can be done if we don't need to set condition codes. + if (immediate < (1 << 12) && set_cc != kCcSet) { return true; } return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate; @@ -1239,7 +1241,10 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond, // The only thumb1 instructions with a register and an immediate are ADD and SUB // with a 3-bit immediate, and RSB with zero immediate. if (opcode == ADD || opcode == SUB) { - if (!IsUint<3>(so.GetImmediate())) { + if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) { + return true; // Cannot match "setflags". + } + if (!IsUint<3>(so.GetImmediate()) && !IsUint<3>(-so.GetImmediate())) { return true; } } else { @@ -1249,8 +1254,12 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond, // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits. if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) { return true; + } else if (opcode != CMP && ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) { + return true; // Cannot match "setflags" for ADD, SUB or MOV. } else { - if (!IsUint<8>(so.GetImmediate())) { + // For ADD and SUB allow also negative 8-bit immediate as we will emit the oposite opcode. + if (!IsUint<8>(so.GetImmediate()) && + (opcode == MOV || opcode == CMP || !IsUint<8>(-so.GetImmediate()))) { return true; } } @@ -1602,12 +1611,18 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, uint8_t rn_shift = 3; uint8_t immediate_shift = 0; bool use_immediate = false; - uint32_t immediate = 0; // Should be at most 9 bits but keep the full immediate for CHECKs. + uint32_t immediate = 0; // Should be at most 10 bits but keep the full immediate for CHECKs. uint8_t thumb_opcode; if (so.IsImmediate()) { use_immediate = true; immediate = so.GetImmediate(); + if (!IsUint<10>(immediate)) { + // Flip ADD/SUB. + opcode = (opcode == ADD) ? SUB : ADD; + immediate = -immediate; + DCHECK(IsUint<10>(immediate)); // More stringent checks below. + } } switch (opcode) { @@ -1644,7 +1659,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, dp_opcode = 2U /* 0b10 */; thumb_opcode = 3U /* 0b11 */; opcode_shift = 12; - CHECK_LT(immediate, (1u << 9)); + CHECK(IsUint<9>(immediate)); CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. @@ -1658,7 +1673,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, dp_opcode = 2U /* 0b10 */; thumb_opcode = 5U /* 0b101 */; opcode_shift = 11; - CHECK_LT(immediate, (1u << 10)); + CHECK(IsUint<10>(immediate)); CHECK_ALIGNED(immediate, 4); // Remove rn from instruction. @@ -1668,11 +1683,13 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, immediate >>= 2; } else if (rn != rd) { // Must use T1. + CHECK(IsUint<3>(immediate)); opcode_shift = 9; thumb_opcode = 14U /* 0b01110 */; immediate_shift = 6; } else { // T2 encoding. + CHECK(IsUint<8>(immediate)); opcode_shift = 11; thumb_opcode = 6U /* 0b110 */; rd_shift = 8; @@ -1702,7 +1719,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, dp_opcode = 2U /* 0b10 */; thumb_opcode = 0x61 /* 0b1100001 */; opcode_shift = 7; - CHECK_LT(immediate, (1u << 9)); + CHECK(IsUint<9>(immediate)); CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. @@ -1713,11 +1730,13 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, immediate >>= 2; } else if (rn != rd) { // Must use T1. + CHECK(IsUint<3>(immediate)); opcode_shift = 9; thumb_opcode = 15U /* 0b01111 */; immediate_shift = 6; } else { // T2 encoding. + CHECK(IsUint<8>(immediate)); opcode_shift = 11; thumb_opcode = 7U /* 0b111 */; rd_shift = 8; @@ -2569,30 +2588,19 @@ void Thumb2Assembler::clz(Register rd, Register rm, Condition cond) { void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) { CheckCondition(cond); - bool must_be_32bit = force_32bit_; - if (IsHighRegister(rd)|| imm16 >= 256u) { - must_be_32bit = true; - } - - if (must_be_32bit) { - // Use encoding T3. - uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */; - uint32_t i = (imm16 >> 11) & 1U /* 0b1 */; - uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */; - uint32_t imm8 = imm16 & 0xff; - int32_t encoding = B31 | B30 | B29 | B28 | - B25 | B22 | - static_cast<uint32_t>(rd) << 8 | - i << 26 | - imm4 << 16 | - imm3 << 12 | - imm8; - Emit32(encoding); - } else { - int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 | - imm16; - Emit16(encoding); - } + // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.) + uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */; + uint32_t i = (imm16 >> 11) & 1U /* 0b1 */; + uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */; + uint32_t imm8 = imm16 & 0xff; + int32_t encoding = B31 | B30 | B29 | B28 | + B25 | B22 | + static_cast<uint32_t>(rd) << 8 | + i << 26 | + imm4 << 16 | + imm3 << 12 | + imm8; + Emit32(encoding); } @@ -3412,25 +3420,30 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, // positive values and sub for negatives ones, which would slightly improve // the readability of generated code for some constants. ShifterOperand shifter_op; - if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) { + if (ShifterOperandCanHold(rd, rn, ADD, value, set_cc, &shifter_op)) { add(rd, rn, shifter_op, cond, set_cc); - } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) { + } else if (ShifterOperandCanHold(rd, rn, SUB, -value, set_cc, &shifter_op)) { sub(rd, rn, shifter_op, cond, set_cc); } else { CHECK(rn != IP); - if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) { - mvn(IP, shifter_op, cond, kCcKeep); - add(rd, rn, ShifterOperand(IP), cond, set_cc); - } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) { - mvn(IP, shifter_op, cond, kCcKeep); - sub(rd, rn, ShifterOperand(IP), cond, set_cc); + // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP. + Register temp = (rd != rn) ? rd : IP; + if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) { + mvn(temp, shifter_op, cond, kCcKeep); + add(rd, rn, ShifterOperand(temp), cond, set_cc); + } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) { + mvn(temp, shifter_op, cond, kCcKeep); + sub(rd, rn, ShifterOperand(temp), cond, set_cc); + } else if (High16Bits(-value) == 0) { + movw(temp, Low16Bits(-value), cond); + sub(rd, rn, ShifterOperand(temp), cond, set_cc); } else { - movw(IP, Low16Bits(value), cond); + movw(temp, Low16Bits(value), cond); uint16_t value_high = High16Bits(value); if (value_high != 0) { - movt(IP, value_high, cond); + movt(temp, value_high, cond); } - add(rd, rn, ShifterOperand(IP), cond, set_cc); + add(rd, rn, ShifterOperand(temp), cond, set_cc); } } } @@ -3440,9 +3453,9 @@ void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { // positive values and sub for negatives ones, which would slightly improve // the readability of generated code for some constants. ShifterOperand shifter_op; - if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) { + if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) { cmp(rn, shifter_op, cond); - } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) { + } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) { cmn(rn, shifter_op, cond); } else { CHECK(rn != IP); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 9aeece8e57..bf07b2dbf8 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -342,7 +342,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { Register rn, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) OVERRIDE; + using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index f1233ca457..2579ddb52e 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -147,7 +147,7 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName2)(const Reg2&), std::string fmt) { std::string str; - std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0); + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); for (auto reg1 : reg1_registers) { for (auto reg2 : reg2_registers) { @@ -188,14 +188,66 @@ class AssemblerTest : public testing::Test { return str; } - template <typename RegType, typename ImmType> - std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), + template <typename ImmType, typename Reg1, typename Reg2> + std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2), + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), int imm_bits, - const std::vector<Reg*> registers, - std::string (AssemblerTest::*GetName)(const RegType&), std::string fmt) { + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); + + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + std::string str; - std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0); + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(new_imm, *reg1, *reg2); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + + template <typename RegType, typename ImmType> + std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), + int imm_bits, + const std::vector<Reg*> registers, + std::string (AssemblerTest::*GetName)(const RegType&), + std::string fmt) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); for (auto reg : registers) { for (int64_t imm : imms) { @@ -291,6 +343,17 @@ class AssemblerTest : public testing::Test { fmt); } + template <typename ImmType> + std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) { + return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + imm_bits, + fmt); + } + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), @@ -840,12 +903,17 @@ class AssemblerTest : public testing::Test { return str; } + // Override this to pad the code with NOPs to a certain size if needed. + virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { + } + void DriverWrapper(std::string assembly_text, std::string test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); assembler_->FinalizeInstructions(code); + Pad(*data); test_helper_->Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 1de51a2dc8..0ef0dc19e6 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -135,7 +135,8 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* toolsdir.c_str(), filename); if (kPrintResults) { // Print the results only, don't check. This is used to generate new output for inserting - // into the .inc file. + // into the .inc file, so let's add the appropriate prefix/suffix needed in the C++ code. + strcat(cmd, " | sed '-es/^/ \"/' | sed '-es/$/\\\\n\",/'"); int cmd_result3 = system(cmd); ASSERT_EQ(cmd_result3, 0) << strerror(errno); } else { @@ -832,11 +833,12 @@ TEST(Thumb2AssemblerTest, StoreMultiple) { TEST(Thumb2AssemblerTest, MovWMovT) { arm::Thumb2Assembler assembler; - __ movw(R4, 0); // 16 bit. - __ movw(R4, 0x34); // 16 bit. - __ movw(R9, 0x34); // 32 bit due to high register. - __ movw(R3, 0x1234); // 32 bit due to large value. - __ movw(R9, 0xffff); // 32 bit due to large value and high register. + // Always 32 bit. + __ movw(R4, 0); + __ movw(R4, 0x34); + __ movw(R9, 0x34); + __ movw(R3, 0x1234); + __ movw(R9, 0xffff); // Always 32 bit. __ movt(R0, 0); @@ -1378,6 +1380,252 @@ TEST(Thumb2AssemblerTest, CompareAndBranch) { EmitAndCheck(&assembler, "CompareAndBranch"); } +TEST(Thumb2AssemblerTest, AddConstant) { + arm::Thumb2Assembler assembler; + + // Low registers, Rd != Rn. + __ AddConstant(R0, R1, 0); // MOV. + __ AddConstant(R0, R1, 1); // 16-bit ADDS, encoding T1. + __ AddConstant(R0, R1, 7); // 16-bit ADDS, encoding T1. + __ AddConstant(R0, R1, 8); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 255); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 256); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 257); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0xfff); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0x1000); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x1001); // MVN+SUB. + __ AddConstant(R0, R1, 0x1002); // MOVW+ADD. + __ AddConstant(R0, R1, 0xffff); // MOVW+ADD. + __ AddConstant(R0, R1, 0x10000); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10001); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10002); // MVN+SUB. + __ AddConstant(R0, R1, 0x10003); // MOVW+MOVT+ADD. + __ AddConstant(R0, R1, -1); // 16-bit SUBS. + __ AddConstant(R0, R1, -7); // 16-bit SUBS. + __ AddConstant(R0, R1, -8); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -255); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -256); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -257); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0xfff); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0x1000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x1001); // MVN+ADD. + __ AddConstant(R0, R1, -0x1002); // MOVW+SUB. + __ AddConstant(R0, R1, -0xffff); // MOVW+SUB. + __ AddConstant(R0, R1, -0x10000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10001); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10002); // MVN+ADD. + __ AddConstant(R0, R1, -0x10003); // MOVW+MOVT+ADD. + + // Low registers, Rd == Rn. + __ AddConstant(R0, R0, 0); // Nothing. + __ AddConstant(R1, R1, 1); // 16-bit ADDS, encoding T2, + __ AddConstant(R0, R0, 7); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 8); // 16-bit ADDS, encoding T2. + __ AddConstant(R0, R0, 255); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 256); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 257); // 32-bit ADD, encoding T4. + __ AddConstant(R1, R1, 0xfff); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R0, 0x1000); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x1001); // MVN+SUB. + __ AddConstant(R0, R0, 0x1002); // MOVW+ADD. + __ AddConstant(R1, R1, 0xffff); // MOVW+ADD. + __ AddConstant(R0, R0, 0x10000); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x10001); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 0x10002); // MVN+SUB. + __ AddConstant(R1, R1, 0x10003); // MOVW+MOVT+ADD. + __ AddConstant(R0, R0, -1); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -7); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -8); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -255); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -256); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -257); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R0, -0xfff); // 32-bit SUB, encoding T4. + __ AddConstant(R1, R1, -0x1000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x1001); // MVN+ADD. + __ AddConstant(R1, R1, -0x1002); // MOVW+SUB. + __ AddConstant(R0, R0, -0xffff); // MOVW+SUB. + __ AddConstant(R1, R1, -0x10000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x10001); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -0x10002); // MVN+ADD. + __ AddConstant(R0, R0, -0x10003); // MOVW+MOVT+ADD. + + // High registers. + __ AddConstant(R8, R8, 0); // Nothing. + __ AddConstant(R8, R1, 1); // 32-bit ADD, encoding T3, + __ AddConstant(R0, R8, 7); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R8, 8); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R1, 255); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R8, 256); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R8, 257); // 32-bit ADD, encoding T4. + __ AddConstant(R8, R1, 0xfff); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R8, 0x1000); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R8, 0x1001); // MVN+SUB. + __ AddConstant(R0, R1, 0x1002); // MOVW+ADD. + __ AddConstant(R0, R8, 0xffff); // MOVW+ADD. + __ AddConstant(R8, R8, 0x10000); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R1, 0x10001); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R8, 0x10002); // MVN+SUB. + __ AddConstant(R0, R8, 0x10003); // MOVW+MOVT+ADD. + __ AddConstant(R8, R8, -1); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R1, -7); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R8, -8); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R8, -255); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R1, -256); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R8, -257); // 32-bit SUB, encoding T4. + __ AddConstant(R8, R8, -0xfff); // 32-bit SUB, encoding T4. + __ AddConstant(R8, R1, -0x1000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R8, -0x1001); // MVN+ADD. + __ AddConstant(R0, R1, -0x1002); // MOVW+SUB. + __ AddConstant(R8, R1, -0xffff); // MOVW+SUB. + __ AddConstant(R0, R8, -0x10000); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R8, -0x10001); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R1, -0x10002); // MVN+SUB. + __ AddConstant(R0, R8, -0x10003); // MOVW+MOVT+ADD. + + // Low registers, Rd != Rn, kCcKeep. + __ AddConstant(R0, R1, 0, AL, kCcKeep); // MOV. + __ AddConstant(R0, R1, 1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 7, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 8, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 255, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 256, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 257, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0xfff, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0x1000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x1001, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R0, R1, 0x1002, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R0, R1, 0xffff, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R0, R1, 0x10000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10001, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10002, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R0, R1, 0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + __ AddConstant(R0, R1, -1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, -7, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -8, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -255, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -256, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -257, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0xfff, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0x1000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x1001, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R0, R1, -0x1002, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R0, R1, -0xffff, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R0, R1, -0x10000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10001, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10002, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R0, R1, -0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + + // Low registers, Rd == Rn, kCcKeep. + __ AddConstant(R0, R0, 0, AL, kCcKeep); // Nothing. + __ AddConstant(R1, R1, 1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 7, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 8, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 255, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 256, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 257, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R1, R1, 0xfff, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R0, 0x1000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x1001, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R0, R0, 0x1002, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R1, R1, 0xffff, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R0, R0, 0x10000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x10001, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 0x10002, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R1, R1, 0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + __ AddConstant(R0, R0, -1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, -7, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -8, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -255, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -256, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -257, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R0, -0xfff, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R1, R1, -0x1000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x1001, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R1, R1, -0x1002, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R0, R0, -0xffff, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R1, R1, -0x10000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x10001, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -0x10002, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R0, R0, -0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + + // Low registers, Rd != Rn, kCcSet. + __ AddConstant(R0, R1, 0, AL, kCcSet); // 16-bit ADDS. + __ AddConstant(R0, R1, 1, AL, kCcSet); // 16-bit ADDS. + __ AddConstant(R0, R1, 7, AL, kCcSet); // 16-bit ADDS. + __ AddConstant(R0, R1, 8, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 255, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 256, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 257, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R1, 0xfff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R1, 0x1000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 0x1001, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R1, 0x1002, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R1, 0xffff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R1, 0x10000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 0x10001, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 0x10002, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R1, 0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + __ AddConstant(R0, R1, -1, AL, kCcSet); // 16-bit SUBS. + __ AddConstant(R0, R1, -7, AL, kCcSet); // 16-bit SUBS. + __ AddConstant(R0, R1, -8, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -255, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -256, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -257, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R1, -0xfff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R1, -0x1000, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -0x1001, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R1, -0x1002, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R1, -0xffff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R1, -0x10000, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -0x10001, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -0x10002, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R1, -0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + + // Low registers, Rd == Rn, kCcSet. + __ AddConstant(R0, R0, 0, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 1, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R0, R0, 7, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 8, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R0, R0, 255, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 256, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R0, 257, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R1, R1, 0xfff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R0, 0x1000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R1, R1, 0x1001, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R0, 0x1002, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R1, R1, 0xffff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R0, 0x10000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R1, R1, 0x10001, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R0, 0x10002, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R1, R1, 0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + __ AddConstant(R0, R0, -1, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -7, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -8, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -255, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -256, AL, kCcSet); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -257, AL, kCcSet); // MNV+ADDS. + __ AddConstant(R0, R0, -0xfff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R1, R1, -0x1000, AL, kCcSet); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x1001, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R1, R1, -0x1002, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R0, -0xffff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R1, R1, -0x10000, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R0, -0x10001, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R1, R1, -0x10002, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R0, -0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + + __ it(EQ); + __ AddConstant(R0, R1, 1, EQ, kCcSet); // 32-bit ADDS, encoding T3. + __ it(NE); + __ AddConstant(R0, R1, 1, NE, kCcKeep); // 16-bit ADDS, encoding T1. + __ it(GE); + __ AddConstant(R0, R0, 1, GE, kCcSet); // 32-bit ADDS, encoding T3. + __ it(LE); + __ AddConstant(R0, R0, 1, LE, kCcKeep); // 16-bit ADDS, encoding T2. + + EmitAndCheck(&assembler, "AddConstant"); +} + #undef __ } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 9246c827a7..f07f8c74d7 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -439,14 +439,14 @@ const char* StoreMultipleResults[] = { nullptr }; const char* MovWMovTResults[] = { - " 0: 2400 movs r4, #0\n", - " 2: 2434 movs r4, #52 ; 0x34\n", - " 4: f240 0934 movw r9, #52 ; 0x34\n", - " 8: f241 2334 movw r3, #4660 ; 0x1234\n", - " c: f64f 79ff movw r9, #65535 ; 0xffff\n", - " 10: f2c0 0000 movt r0, #0\n", - " 14: f2c1 2034 movt r0, #4660 ; 0x1234\n", - " 18: f6cf 71ff movt r1, #65535 ; 0xffff\n", + " 0: f240 0400 movw r4, #0\n", + " 4: f240 0434 movw r4, #52 ; 0x34\n", + " 8: f240 0934 movw r9, #52 ; 0x34\n", + " c: f241 2334 movw r3, #4660 ; 0x1234\n", + " 10: f64f 79ff movw r9, #65535 ; 0xffff\n", + " 14: f2c0 0000 movt r0, #0\n", + " 18: f2c1 2034 movt r0, #4660 ; 0x1234\n", + " 1c: f6cf 71ff movt r1, #65535 ; 0xffff\n", nullptr }; const char* SpecialAddSubResults[] = { @@ -5052,6 +5052,324 @@ const char* CompareAndBranchResults[] = { nullptr }; +const char* AddConstantResults[] = { + " 0: 4608 mov r0, r1\n", + " 2: 1c48 adds r0, r1, #1\n", + " 4: 1dc8 adds r0, r1, #7\n", + " 6: f101 0008 add.w r0, r1, #8\n", + " a: f101 00ff add.w r0, r1, #255 ; 0xff\n", + " e: f501 7080 add.w r0, r1, #256 ; 0x100\n", + " 12: f201 1001 addw r0, r1, #257 ; 0x101\n", + " 16: f601 70ff addw r0, r1, #4095 ; 0xfff\n", + " 1a: f501 5080 add.w r0, r1, #4096 ; 0x1000\n", + " 1e: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 22: 1a08 subs r0, r1, r0\n", + " 24: f241 0002 movw r0, #4098 ; 0x1002\n", + " 28: 1808 adds r0, r1, r0\n", + " 2a: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 2e: 1808 adds r0, r1, r0\n", + " 30: f501 3080 add.w r0, r1, #65536 ; 0x10000\n", + " 34: f101 1001 add.w r0, r1, #65537 ; 0x10001\n", + " 38: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 3c: 1a08 subs r0, r1, r0\n", + " 3e: f240 0003 movw r0, #3\n", + " 42: f2c0 0001 movt r0, #1\n", + " 46: 1808 adds r0, r1, r0\n", + " 48: 1e48 subs r0, r1, #1\n", + " 4a: 1fc8 subs r0, r1, #7\n", + " 4c: f1a1 0008 sub.w r0, r1, #8\n", + " 50: f1a1 00ff sub.w r0, r1, #255 ; 0xff\n", + " 54: f5a1 7080 sub.w r0, r1, #256 ; 0x100\n", + " 58: f2a1 1001 subw r0, r1, #257 ; 0x101\n", + " 5c: f6a1 70ff subw r0, r1, #4095 ; 0xfff\n", + " 60: f5a1 5080 sub.w r0, r1, #4096 ; 0x1000\n", + " 64: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 68: 1808 adds r0, r1, r0\n", + " 6a: f241 0002 movw r0, #4098 ; 0x1002\n", + " 6e: 1a08 subs r0, r1, r0\n", + " 70: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 74: 1a08 subs r0, r1, r0\n", + " 76: f5a1 3080 sub.w r0, r1, #65536 ; 0x10000\n", + " 7a: f1a1 1001 sub.w r0, r1, #65537 ; 0x10001\n", + " 7e: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 82: 1808 adds r0, r1, r0\n", + " 84: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 88: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 8c: 1808 adds r0, r1, r0\n", + " 8e: 3101 adds r1, #1\n", + " 90: 3007 adds r0, #7\n", + " 92: 3108 adds r1, #8\n", + " 94: 30ff adds r0, #255 ; 0xff\n", + " 96: f501 7180 add.w r1, r1, #256 ; 0x100\n", + " 9a: f200 1001 addw r0, r0, #257 ; 0x101\n", + " 9e: f601 71ff addw r1, r1, #4095 ; 0xfff\n", + " a2: f500 5080 add.w r0, r0, #4096 ; 0x1000\n", + " a6: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " aa: eba1 010c sub.w r1, r1, ip\n", + " ae: f241 0c02 movw ip, #4098 ; 0x1002\n", + " b2: 4460 add r0, ip\n", + " b4: f64f 7cff movw ip, #65535 ; 0xffff\n", + " b8: 4461 add r1, ip\n", + " ba: f500 3080 add.w r0, r0, #65536 ; 0x10000\n", + " be: f101 1101 add.w r1, r1, #65537 ; 0x10001\n", + " c2: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " c6: eba0 000c sub.w r0, r0, ip\n", + " ca: f240 0c03 movw ip, #3\n", + " ce: f2c0 0c01 movt ip, #1\n", + " d2: 4461 add r1, ip\n", + " d4: 3801 subs r0, #1\n", + " d6: 3907 subs r1, #7\n", + " d8: 3808 subs r0, #8\n", + " da: 39ff subs r1, #255 ; 0xff\n", + " dc: f5a0 7080 sub.w r0, r0, #256 ; 0x100\n", + " e0: f2a1 1101 subw r1, r1, #257 ; 0x101\n", + " e4: f6a0 70ff subw r0, r0, #4095 ; 0xfff\n", + " e8: f5a1 5180 sub.w r1, r1, #4096 ; 0x1000\n", + " ec: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " f0: 4460 add r0, ip\n", + " f2: f241 0c02 movw ip, #4098 ; 0x1002\n", + " f6: eba1 010c sub.w r1, r1, ip\n", + " fa: f64f 7cff movw ip, #65535 ; 0xffff\n", + " fe: eba0 000c sub.w r0, r0, ip\n", + " 102: f5a1 3180 sub.w r1, r1, #65536 ; 0x10000\n", + " 106: f1a0 1001 sub.w r0, r0, #65537 ; 0x10001\n", + " 10a: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 10e: 4461 add r1, ip\n", + " 110: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 114: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 118: 4460 add r0, ip\n", + " 11a: f101 0801 add.w r8, r1, #1\n", + " 11e: f108 0007 add.w r0, r8, #7\n", + " 122: f108 0808 add.w r8, r8, #8\n", + " 126: f101 08ff add.w r8, r1, #255 ; 0xff\n", + " 12a: f508 7080 add.w r0, r8, #256 ; 0x100\n", + " 12e: f208 1801 addw r8, r8, #257 ; 0x101\n", + " 132: f601 78ff addw r8, r1, #4095 ; 0xfff\n", + " 136: f508 5080 add.w r0, r8, #4096 ; 0x1000\n", + " 13a: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 13e: eba8 080c sub.w r8, r8, ip\n", + " 142: f241 0002 movw r0, #4098 ; 0x1002\n", + " 146: 1808 adds r0, r1, r0\n", + " 148: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 14c: eb08 0000 add.w r0, r8, r0\n", + " 150: f508 3880 add.w r8, r8, #65536 ; 0x10000\n", + " 154: f101 1801 add.w r8, r1, #65537 ; 0x10001\n", + " 158: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 15c: eba8 0000 sub.w r0, r8, r0\n", + " 160: f240 0003 movw r0, #3\n", + " 164: f2c0 0001 movt r0, #1\n", + " 168: eb08 0000 add.w r0, r8, r0\n", + " 16c: f108 38ff add.w r8, r8, #4294967295 ; 0xffffffff\n", + " 170: f1a1 0807 sub.w r8, r1, #7\n", + " 174: f1a8 0008 sub.w r0, r8, #8\n", + " 178: f1a8 08ff sub.w r8, r8, #255 ; 0xff\n", + " 17c: f5a1 7880 sub.w r8, r1, #256 ; 0x100\n", + " 180: f2a8 1001 subw r0, r8, #257 ; 0x101\n", + " 184: f6a8 78ff subw r8, r8, #4095 ; 0xfff\n", + " 188: f5a1 5880 sub.w r8, r1, #4096 ; 0x1000\n", + " 18c: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 190: eb08 0000 add.w r0, r8, r0\n", + " 194: f241 0002 movw r0, #4098 ; 0x1002\n", + " 198: 1a08 subs r0, r1, r0\n", + " 19a: f64f 78ff movw r8, #65535 ; 0xffff\n", + " 19e: eba1 0808 sub.w r8, r1, r8\n", + " 1a2: f5a8 3080 sub.w r0, r8, #65536 ; 0x10000\n", + " 1a6: f1a8 1801 sub.w r8, r8, #65537 ; 0x10001\n", + " 1aa: f06f 1801 mvn.w r8, #65537 ; 0x10001\n", + " 1ae: eb01 0808 add.w r8, r1, r8\n", + " 1b2: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 1b6: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 1ba: eb08 0000 add.w r0, r8, r0\n", + " 1be: 4608 mov r0, r1\n", + " 1c0: f101 0001 add.w r0, r1, #1\n", + " 1c4: f101 0007 add.w r0, r1, #7\n", + " 1c8: f101 0008 add.w r0, r1, #8\n", + " 1cc: f101 00ff add.w r0, r1, #255 ; 0xff\n", + " 1d0: f501 7080 add.w r0, r1, #256 ; 0x100\n", + " 1d4: f201 1001 addw r0, r1, #257 ; 0x101\n", + " 1d8: f601 70ff addw r0, r1, #4095 ; 0xfff\n", + " 1dc: f501 5080 add.w r0, r1, #4096 ; 0x1000\n", + " 1e0: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 1e4: eba1 0000 sub.w r0, r1, r0\n", + " 1e8: f241 0002 movw r0, #4098 ; 0x1002\n", + " 1ec: eb01 0000 add.w r0, r1, r0\n", + " 1f0: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 1f4: eb01 0000 add.w r0, r1, r0\n", + " 1f8: f501 3080 add.w r0, r1, #65536 ; 0x10000\n", + " 1fc: f101 1001 add.w r0, r1, #65537 ; 0x10001\n", + " 200: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 204: eba1 0000 sub.w r0, r1, r0\n", + " 208: f240 0003 movw r0, #3\n", + " 20c: f2c0 0001 movt r0, #1\n", + " 210: eb01 0000 add.w r0, r1, r0\n", + " 214: f101 30ff add.w r0, r1, #4294967295 ; 0xffffffff\n", + " 218: f1a1 0007 sub.w r0, r1, #7\n", + " 21c: f1a1 0008 sub.w r0, r1, #8\n", + " 220: f1a1 00ff sub.w r0, r1, #255 ; 0xff\n", + " 224: f5a1 7080 sub.w r0, r1, #256 ; 0x100\n", + " 228: f2a1 1001 subw r0, r1, #257 ; 0x101\n", + " 22c: f6a1 70ff subw r0, r1, #4095 ; 0xfff\n", + " 230: f5a1 5080 sub.w r0, r1, #4096 ; 0x1000\n", + " 234: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 238: eb01 0000 add.w r0, r1, r0\n", + " 23c: f241 0002 movw r0, #4098 ; 0x1002\n", + " 240: eba1 0000 sub.w r0, r1, r0\n", + " 244: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 248: eba1 0000 sub.w r0, r1, r0\n", + " 24c: f5a1 3080 sub.w r0, r1, #65536 ; 0x10000\n", + " 250: f1a1 1001 sub.w r0, r1, #65537 ; 0x10001\n", + " 254: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 258: eb01 0000 add.w r0, r1, r0\n", + " 25c: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 260: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 264: eb01 0000 add.w r0, r1, r0\n", + " 268: f101 0101 add.w r1, r1, #1\n", + " 26c: f100 0007 add.w r0, r0, #7\n", + " 270: f101 0108 add.w r1, r1, #8\n", + " 274: f100 00ff add.w r0, r0, #255 ; 0xff\n", + " 278: f501 7180 add.w r1, r1, #256 ; 0x100\n", + " 27c: f200 1001 addw r0, r0, #257 ; 0x101\n", + " 280: f601 71ff addw r1, r1, #4095 ; 0xfff\n", + " 284: f500 5080 add.w r0, r0, #4096 ; 0x1000\n", + " 288: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 28c: eba1 010c sub.w r1, r1, ip\n", + " 290: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 294: 4460 add r0, ip\n", + " 296: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 29a: 4461 add r1, ip\n", + " 29c: f500 3080 add.w r0, r0, #65536 ; 0x10000\n", + " 2a0: f101 1101 add.w r1, r1, #65537 ; 0x10001\n", + " 2a4: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 2a8: eba0 000c sub.w r0, r0, ip\n", + " 2ac: f240 0c03 movw ip, #3\n", + " 2b0: f2c0 0c01 movt ip, #1\n", + " 2b4: 4461 add r1, ip\n", + " 2b6: f100 30ff add.w r0, r0, #4294967295 ; 0xffffffff\n", + " 2ba: f1a1 0107 sub.w r1, r1, #7\n", + " 2be: f1a0 0008 sub.w r0, r0, #8\n", + " 2c2: f1a1 01ff sub.w r1, r1, #255 ; 0xff\n", + " 2c6: f5a0 7080 sub.w r0, r0, #256 ; 0x100\n", + " 2ca: f2a1 1101 subw r1, r1, #257 ; 0x101\n", + " 2ce: f6a0 70ff subw r0, r0, #4095 ; 0xfff\n", + " 2d2: f5a1 5180 sub.w r1, r1, #4096 ; 0x1000\n", + " 2d6: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 2da: 4460 add r0, ip\n", + " 2dc: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 2e0: eba1 010c sub.w r1, r1, ip\n", + " 2e4: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 2e8: eba0 000c sub.w r0, r0, ip\n", + " 2ec: f5a1 3180 sub.w r1, r1, #65536 ; 0x10000\n", + " 2f0: f1a0 1001 sub.w r0, r0, #65537 ; 0x10001\n", + " 2f4: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 2f8: 4461 add r1, ip\n", + " 2fa: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 2fe: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 302: 4460 add r0, ip\n", + " 304: 1c08 adds r0, r1, #0\n", + " 306: 1c48 adds r0, r1, #1\n", + " 308: 1dc8 adds r0, r1, #7\n", + " 30a: f111 0008 adds.w r0, r1, #8\n", + " 30e: f111 00ff adds.w r0, r1, #255 ; 0xff\n", + " 312: f511 7080 adds.w r0, r1, #256 ; 0x100\n", + " 316: f46f 7080 mvn.w r0, #256 ; 0x100\n", + " 31a: 1a08 subs r0, r1, r0\n", + " 31c: f640 70ff movw r0, #4095 ; 0xfff\n", + " 320: 1808 adds r0, r1, r0\n", + " 322: f511 5080 adds.w r0, r1, #4096 ; 0x1000\n", + " 326: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 32a: 1a08 subs r0, r1, r0\n", + " 32c: f241 0002 movw r0, #4098 ; 0x1002\n", + " 330: 1808 adds r0, r1, r0\n", + " 332: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 336: 1808 adds r0, r1, r0\n", + " 338: f511 3080 adds.w r0, r1, #65536 ; 0x10000\n", + " 33c: f111 1001 adds.w r0, r1, #65537 ; 0x10001\n", + " 340: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 344: 1a08 subs r0, r1, r0\n", + " 346: f240 0003 movw r0, #3\n", + " 34a: f2c0 0001 movt r0, #1\n", + " 34e: 1808 adds r0, r1, r0\n", + " 350: 1e48 subs r0, r1, #1\n", + " 352: 1fc8 subs r0, r1, #7\n", + " 354: f1b1 0008 subs.w r0, r1, #8\n", + " 358: f1b1 00ff subs.w r0, r1, #255 ; 0xff\n", + " 35c: f5b1 7080 subs.w r0, r1, #256 ; 0x100\n", + " 360: f46f 7080 mvn.w r0, #256 ; 0x100\n", + " 364: 1808 adds r0, r1, r0\n", + " 366: f640 70ff movw r0, #4095 ; 0xfff\n", + " 36a: 1a08 subs r0, r1, r0\n", + " 36c: f5b1 5080 subs.w r0, r1, #4096 ; 0x1000\n", + " 370: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 374: 1808 adds r0, r1, r0\n", + " 376: f241 0002 movw r0, #4098 ; 0x1002\n", + " 37a: 1a08 subs r0, r1, r0\n", + " 37c: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 380: 1a08 subs r0, r1, r0\n", + " 382: f5b1 3080 subs.w r0, r1, #65536 ; 0x10000\n", + " 386: f1b1 1001 subs.w r0, r1, #65537 ; 0x10001\n", + " 38a: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 38e: 1808 adds r0, r1, r0\n", + " 390: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 394: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 398: 1808 adds r0, r1, r0\n", + " 39a: 3000 adds r0, #0\n", + " 39c: 3101 adds r1, #1\n", + " 39e: 3007 adds r0, #7\n", + " 3a0: 3108 adds r1, #8\n", + " 3a2: 30ff adds r0, #255 ; 0xff\n", + " 3a4: f511 7180 adds.w r1, r1, #256 ; 0x100\n", + " 3a8: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 3ac: ebb0 000c subs.w r0, r0, ip\n", + " 3b0: f640 7cff movw ip, #4095 ; 0xfff\n", + " 3b4: eb11 010c adds.w r1, r1, ip\n", + " 3b8: f510 5080 adds.w r0, r0, #4096 ; 0x1000\n", + " 3bc: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 3c0: ebb1 010c subs.w r1, r1, ip\n", + " 3c4: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 3c8: eb10 000c adds.w r0, r0, ip\n", + " 3cc: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 3d0: eb11 010c adds.w r1, r1, ip\n", + " 3d4: f510 3080 adds.w r0, r0, #65536 ; 0x10000\n", + " 3d8: f111 1101 adds.w r1, r1, #65537 ; 0x10001\n", + " 3dc: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 3e0: ebb0 000c subs.w r0, r0, ip\n", + " 3e4: f240 0c03 movw ip, #3\n", + " 3e8: f2c0 0c01 movt ip, #1\n", + " 3ec: eb11 010c adds.w r1, r1, ip\n", + " 3f0: 3801 subs r0, #1\n", + " 3f2: 3907 subs r1, #7\n", + " 3f4: 3808 subs r0, #8\n", + " 3f6: 39ff subs r1, #255 ; 0xff\n", + " 3f8: f5b0 7080 subs.w r0, r0, #256 ; 0x100\n", + " 3fc: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 400: eb11 010c adds.w r1, r1, ip\n", + " 404: f640 7cff movw ip, #4095 ; 0xfff\n", + " 408: ebb0 000c subs.w r0, r0, ip\n", + " 40c: f5b1 5180 subs.w r1, r1, #4096 ; 0x1000\n", + " 410: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 414: eb10 000c adds.w r0, r0, ip\n", + " 418: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 41c: ebb1 010c subs.w r1, r1, ip\n", + " 420: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 424: ebb0 000c subs.w r0, r0, ip\n", + " 428: f5b1 3180 subs.w r1, r1, #65536 ; 0x10000\n", + " 42c: f1b0 1001 subs.w r0, r0, #65537 ; 0x10001\n", + " 430: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 434: eb11 010c adds.w r1, r1, ip\n", + " 438: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 43c: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 440: eb10 000c adds.w r0, r0, ip\n", + " 444: bf08 it eq\n", + " 446: f111 0001 addseq.w r0, r1, #1\n", + " 44a: bf18 it ne\n", + " 44c: 1c48 addne r0, r1, #1\n", + " 44e: bfa8 it ge\n", + " 450: f110 0001 addsge.w r0, r0, #1\n", + " 454: bfd8 it le\n", + " 456: 3001 addle r0, #1\n", + nullptr +}; + std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; @@ -5102,4 +5420,5 @@ void setup_results() { test_results["LoadStoreLiteral"] = LoadStoreLiteralResults; test_results["LoadStoreLimits"] = LoadStoreLimitsResults; test_results["CompareAndBranch"] = CompareAndBranchResults; + test_results["AddConstant"] = AddConstantResults; } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index fc7ac7061a..afca8adcbb 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -249,6 +249,11 @@ void MipsAssembler::MulR6(Register rd, Register rs, Register rt) { EmitR(0, rs, rt, rd, 2, 0x18); } +void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) { + CHECK(IsR6()); + EmitR(0, rs, rt, rd, 3, 0x18); +} + void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); EmitR(0, rs, rt, rd, 3, 0x19); @@ -302,6 +307,46 @@ void MipsAssembler::Nor(Register rd, Register rs, Register rt) { EmitR(0, rs, rt, rd, 0, 0x27); } +void MipsAssembler::Movz(Register rd, Register rs, Register rt) { + CHECK(!IsR6()); + EmitR(0, rs, rt, rd, 0, 0x0A); +} + +void MipsAssembler::Movn(Register rd, Register rs, Register rt) { + CHECK(!IsR6()); + EmitR(0, rs, rt, rd, 0, 0x0B); +} + +void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) { + CHECK(IsR6()); + EmitR(0, rs, rt, rd, 0, 0x35); +} + +void MipsAssembler::Selnez(Register rd, Register rs, Register rt) { + CHECK(IsR6()); + EmitR(0, rs, rt, rd, 0, 0x37); +} + +void MipsAssembler::ClzR6(Register rd, Register rs) { + CHECK(IsR6()); + EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10); +} + +void MipsAssembler::ClzR2(Register rd, Register rs) { + CHECK(!IsR6()); + EmitR(0x1C, rs, rd, rd, 0, 0x20); +} + +void MipsAssembler::CloR6(Register rd, Register rs) { + CHECK(IsR6()); + EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11); +} + +void MipsAssembler::CloR2(Register rd, Register rs) { + CHECK(!IsR6()); + EmitR(0x1C, rs, rd, rd, 0, 0x21); +} + void MipsAssembler::Seb(Register rd, Register rt) { EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20); } @@ -314,6 +359,11 @@ void MipsAssembler::Wsbh(Register rd, Register rt) { EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20); } +void MipsAssembler::Bitswap(Register rd, Register rt) { + CHECK(IsR6()); + EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20); +} + void MipsAssembler::Sll(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00); @@ -342,6 +392,10 @@ void MipsAssembler::Srlv(Register rd, Register rt, Register rs) { EmitR(0, rs, rt, rd, 0, 0x06); } +void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) { + EmitR(0, rs, rt, rd, 1, 0x06); +} + void MipsAssembler::Srav(Register rd, Register rt, Register rs) { EmitR(0, rs, rt, rd, 0, 0x07); } @@ -449,6 +503,18 @@ void MipsAssembler::Bgtz(Register rt, uint16_t imm16) { EmitI(0x7, rt, static_cast<Register>(0), imm16); } +void MipsAssembler::Bc1f(int cc, uint16_t imm16) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16); +} + +void MipsAssembler::Bc1t(int cc, uint16_t imm16) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16); +} + void MipsAssembler::J(uint32_t addr26) { EmitI26(0x2, addr26); } @@ -583,7 +649,17 @@ void MipsAssembler::Bnezc(Register rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } -void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16) { +void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) { + CHECK(IsR6()); + EmitFI(0x11, 0x9, ft, imm16); +} + +void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) { + CHECK(IsR6()); + EmitFI(0x11, 0xD, ft, imm16); +} + +void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) { switch (cond) { case kCondLTZ: CHECK_EQ(rt, ZERO); @@ -615,6 +691,14 @@ void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, ui CHECK_EQ(rt, ZERO); Bnez(rs, imm16); break; + case kCondF: + CHECK_EQ(rt, ZERO); + Bc1f(static_cast<int>(rs), imm16); + break; + case kCondT: + CHECK_EQ(rt, ZERO); + Bc1t(static_cast<int>(rs), imm16); + break; case kCondLT: case kCondGE: case kCondLE: @@ -629,7 +713,7 @@ void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, ui } } -void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) { +void MipsAssembler::EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) { switch (cond) { case kCondLT: Bltc(rs, rt, imm16_21); @@ -679,6 +763,14 @@ void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, u case kCondGEU: Bgeuc(rs, rt, imm16_21); break; + case kCondF: + CHECK_EQ(rt, ZERO); + Bc1eqz(static_cast<FRegister>(rs), imm16_21); + break; + case kCondT: + CHECK_EQ(rt, ZERO); + Bc1nez(static_cast<FRegister>(rs), imm16_21); + break; case kUncond: LOG(FATAL) << "Unexpected branch condition " << cond; UNREACHABLE(); @@ -733,6 +825,202 @@ void MipsAssembler::NegD(FRegister fd, FRegister fs) { EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7); } +void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31); +} + +void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32); +} + +void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33); +} + +void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34); +} + +void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35); +} + +void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36); +} + +void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37); +} + +void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31); +} + +void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32); +} + +void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33); +} + +void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34); +} + +void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35); +} + +void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36); +} + +void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37); +} + +void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x01); +} + +void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x02); +} + +void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x03); +} + +void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x04); +} + +void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x05); +} + +void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x06); +} + +void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x07); +} + +void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x11); +} + +void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x12); +} + +void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x14, ft, fs, fd, 0x13); +} + +void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x01); +} + +void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x02); +} + +void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x03); +} + +void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x04); +} + +void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x05); +} + +void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x06); +} + +void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x07); +} + +void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x11); +} + +void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x12); +} + +void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x15, ft, fs, fd, 0x13); +} + +void MipsAssembler::Movf(Register rd, Register rs, int cc) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01); +} + +void MipsAssembler::Movt(Register rd, Register rs, int cc) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); +} + void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) { EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20); } @@ -1004,6 +1292,10 @@ MipsAssembler::Branch::Branch(bool is_r6, CHECK_NE(lhs_reg, ZERO); CHECK_EQ(rhs_reg, ZERO); break; + case kCondF: + case kCondT: + CHECK_EQ(rhs_reg, ZERO); + break; case kUncond: UNREACHABLE(); } @@ -1058,6 +1350,10 @@ MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition( return kCondGEU; case kCondGEU: return kCondLTU; + case kCondF: + return kCondT; + case kCondT: + return kCondF; case kUncond: LOG(FATAL) << "Unexpected branch condition " << cond; } @@ -1460,7 +1756,7 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kCondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - EmitBcond(condition, lhs, rhs, offset); + EmitBcondR2(condition, lhs, rhs, offset); Nop(); // TODO: improve by filling the delay slot. break; case Branch::kCall: @@ -1507,7 +1803,7 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { // Note: the opposite condition branch encodes 8 as the distance, which is equal to the // number of instructions skipped: // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR). - EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, 8); + EmitBcondR2(Branch::OppositeCondition(condition), lhs, rhs, 8); Push(RA); Nal(); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -1535,8 +1831,8 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kR6CondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - EmitBcondc(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the forbidden slot. + EmitBcondR6(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden/delay slot. break; case Branch::kR6Call: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -1552,7 +1848,7 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCondBranch: - EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2); offset += (offset & 0x8000) << 1; // Account for sign extension in jic. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); @@ -1654,6 +1950,24 @@ void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) { } } +void MipsAssembler::Bc1f(int cc, MipsLabel* label) { + CHECK(IsUint<3>(cc)) << cc; + Bcond(label, kCondF, static_cast<Register>(cc), ZERO); +} + +void MipsAssembler::Bc1t(int cc, MipsLabel* label) { + CHECK(IsUint<3>(cc)) << cc; + Bcond(label, kCondT, static_cast<Register>(cc), ZERO); +} + +void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) { + Bcond(label, kCondF, static_cast<Register>(ft), ZERO); +} + +void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) { + Bcond(label, kCondT, static_cast<Register>(ft), ZERO); +} + void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset) { // IsInt<16> must be passed a signed value. diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 1ef0992dac..f569aa858c 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -72,8 +72,8 @@ class MipsExceptionSlowPath { : scratch_(scratch), stack_adjust_(stack_adjust) {} MipsExceptionSlowPath(MipsExceptionSlowPath&& src) - : scratch_(std::move(src.scratch_)), - stack_adjust_(std::move(src.stack_adjust_)), + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), exception_entry_(std::move(src.exception_entry_)) {} private: @@ -119,6 +119,7 @@ class MipsAssembler FINAL : public Assembler { void DivuR2(Register rd, Register rs, Register rt); // R2 void ModuR2(Register rd, Register rs, Register rt); // R2 void MulR6(Register rd, Register rs, Register rt); // R6 + void MuhR6(Register rd, Register rs, Register rt); // R6 void MuhuR6(Register rd, Register rs, Register rt); // R6 void DivR6(Register rd, Register rs, Register rt); // R6 void ModR6(Register rd, Register rs, Register rt); // R6 @@ -133,9 +134,19 @@ class MipsAssembler FINAL : public Assembler { void Xori(Register rt, Register rs, uint16_t imm16); void Nor(Register rd, Register rs, Register rt); + void Movz(Register rd, Register rs, Register rt); // R2 + void Movn(Register rd, Register rs, Register rt); // R2 + void Seleqz(Register rd, Register rs, Register rt); // R6 + void Selnez(Register rd, Register rs, Register rt); // R6 + void ClzR6(Register rd, Register rs); + void ClzR2(Register rd, Register rs); + void CloR6(Register rd, Register rs); + void CloR2(Register rd, Register rs); + void Seb(Register rd, Register rt); // R2+ void Seh(Register rd, Register rt); // R2+ void Wsbh(Register rd, Register rt); // R2+ + void Bitswap(Register rd, Register rt); // R6 void Sll(Register rd, Register rt, int shamt); void Srl(Register rd, Register rt, int shamt); @@ -143,6 +154,7 @@ class MipsAssembler FINAL : public Assembler { void Sra(Register rd, Register rt, int shamt); void Sllv(Register rd, Register rt, Register rs); void Srlv(Register rd, Register rt, Register rs); + void Rotrv(Register rd, Register rt, Register rs); // R2+ void Srav(Register rd, Register rt, Register rs); void Lb(Register rt, Register rs, uint16_t imm16); @@ -173,6 +185,8 @@ class MipsAssembler FINAL : public Assembler { void Bgez(Register rt, uint16_t imm16); void Blez(Register rt, uint16_t imm16); void Bgtz(Register rt, uint16_t imm16); + void Bc1f(int cc, uint16_t imm16); // R2 + void Bc1t(int cc, uint16_t imm16); // R2 void J(uint32_t addr26); void Jal(uint32_t addr26); void Jalr(Register rd, Register rs); @@ -196,6 +210,8 @@ class MipsAssembler FINAL : public Assembler { void Bnec(Register rs, Register rt, uint16_t imm16); // R6 void Beqzc(Register rs, uint32_t imm21); // R6 void Bnezc(Register rs, uint32_t imm21); // R6 + void Bc1eqz(FRegister ft, uint16_t imm16); // R6 + void Bc1nez(FRegister ft, uint16_t imm16); // R6 void AddS(FRegister fd, FRegister fs, FRegister ft); void SubS(FRegister fd, FRegister fs, FRegister ft); @@ -210,6 +226,43 @@ class MipsAssembler FINAL : public Assembler { void NegS(FRegister fd, FRegister fs); void NegD(FRegister fd, FRegister fs); + void CunS(int cc, FRegister fs, FRegister ft); // R2 + void CeqS(int cc, FRegister fs, FRegister ft); // R2 + void CueqS(int cc, FRegister fs, FRegister ft); // R2 + void ColtS(int cc, FRegister fs, FRegister ft); // R2 + void CultS(int cc, FRegister fs, FRegister ft); // R2 + void ColeS(int cc, FRegister fs, FRegister ft); // R2 + void CuleS(int cc, FRegister fs, FRegister ft); // R2 + void CunD(int cc, FRegister fs, FRegister ft); // R2 + void CeqD(int cc, FRegister fs, FRegister ft); // R2 + void CueqD(int cc, FRegister fs, FRegister ft); // R2 + void ColtD(int cc, FRegister fs, FRegister ft); // R2 + void CultD(int cc, FRegister fs, FRegister ft); // R2 + void ColeD(int cc, FRegister fs, FRegister ft); // R2 + void CuleD(int cc, FRegister fs, FRegister ft); // R2 + void CmpUnS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpEqS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUeqS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpLtS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUltS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpLeS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUleS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpOrS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUneS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpNeS(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUnD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpEqD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUeqD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpLtD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUltD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpLeD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUleD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpOrD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpUneD(FRegister fd, FRegister fs, FRegister ft); // R6 + void CmpNeD(FRegister fd, FRegister fs, FRegister ft); // R6 + void Movf(Register rd, Register rs, int cc); // R2 + void Movt(Register rd, Register rs, int cc); // R2 + void Cvtsw(FRegister fd, FRegister fs); void Cvtdw(FRegister fd, FRegister fs); void Cvtsd(FRegister fd, FRegister fs); @@ -255,6 +308,10 @@ class MipsAssembler FINAL : public Assembler { void Bge(Register rs, Register rt, MipsLabel* label); void Bltu(Register rs, Register rt, MipsLabel* label); void Bgeu(Register rs, Register rt, MipsLabel* label); + void Bc1f(int cc, MipsLabel* label); // R2 + void Bc1t(int cc, MipsLabel* label); // R2 + void Bc1eqz(FRegister ft, MipsLabel* label); // R6 + void Bc1nez(FRegister ft, MipsLabel* label); // R6 void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset); @@ -284,7 +341,8 @@ class MipsAssembler FINAL : public Assembler { // // Emit code that will create an activation on the stack. - void BuildFrame(size_t frame_size, ManagedRegister method_reg, + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; @@ -302,58 +360,85 @@ class MipsAssembler FINAL : public Assembler { void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister mscratch) - OVERRIDE; + void StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, + uint32_t imm, + ManagedRegister mscratch) OVERRIDE; - void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs, + FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE; + void StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) OVERRIDE; - void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, + void StoreSpanning(FrameOffset dest, + ManagedRegister msrc, + FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) OVERRIDE; + void LoadFromThread32(ManagedRegister mdest, + ThreadOffset<kMipsWordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, + void LoadRef(ManagedRegister mdest, + ManagedRegister base, + MemberOffset offs, bool unpoison_reference) OVERRIDE; void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<4> offs) OVERRIDE; + void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<kMipsWordSize> offs) OVERRIDE; // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs, + void CopyRawPtrFromThread32(FrameOffset fr_offs, + ThreadOffset<kMipsWordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs, + FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch, + void Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister mscratch, size_t size) OVERRIDE; - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister mscratch, size_t size) OVERRIDE; + void Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister mscratch, + size_t size) OVERRIDE; - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch, + void Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister mscratch, size_t size) OVERRIDE; - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister mscratch, size_t size) OVERRIDE; + void Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister mscratch, + size_t size) OVERRIDE; - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister mscratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister mscratch, + size_t size) OVERRIDE; void MemoryBarrier(ManagedRegister) OVERRIDE; @@ -371,13 +456,17 @@ class MipsAssembler FINAL : public Assembler { // value is null and null_allowed. in_reg holds a possibly stale reference // that can be used to avoid loading the handle scope entry to see if the value is // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; + void CreateHandleScopeEntry(ManagedRegister out_reg, + FrameOffset handlescope_offset, + ManagedRegister in_reg, + bool null_allowed) OVERRIDE; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister mscratch, bool null_allowed) OVERRIDE; + void CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handlescope_offset, + ManagedRegister mscratch, + bool null_allowed) OVERRIDE; // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; @@ -390,7 +479,7 @@ class MipsAssembler FINAL : public Assembler { // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread32(ThreadOffset<4> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread32(ThreadOffset<kMipsWordSize> offset, ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. @@ -425,6 +514,8 @@ class MipsAssembler FINAL : public Assembler { kCondNEZ, kCondLTU, kCondGEU, + kCondF, // Floating-point predicate false. + kCondT, // Floating-point predicate true. kUncond, }; friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); @@ -531,7 +622,22 @@ class MipsAssembler FINAL : public Assembler { // // Composite branches (made of several instructions) with longer reach have 32-bit // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). - // The composite branches cover the range of PC + +/-2GB. + // The composite branches cover the range of PC + +/-2GB on MIPS32 CPUs. However, + // the range is not end-to-end on MIPS64 (unless addresses are forced to zero- or + // sign-extend from 32 to 64 bits by the appropriate CPU configuration). + // Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. // // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special // case with the addiu instruction and a 16 bit offset. @@ -568,17 +674,17 @@ class MipsAssembler FINAL : public Assembler { // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); - uint32_t old_location_; // Offset into assembler buffer in bytes. - uint32_t location_; // Offset into assembler buffer in bytes. - uint32_t target_; // Offset into assembler buffer in bytes. + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. - uint32_t lhs_reg_ : 5; // Left-hand side register in conditional branches or - // indirect call register. - uint32_t rhs_reg_ : 5; // Right-hand side register in conditional branches. - BranchCondition condition_ : 5; // Condition for conditional branches. + uint32_t lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + uint32_t rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. - Type type_ : 5; // Current type of the branch. - Type old_type_ : 5; // Initial type of the branch. + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. }; friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); @@ -589,8 +695,8 @@ class MipsAssembler FINAL : public Assembler { void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct); void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm); - void EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16); - void EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21); // R6 + void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16); + void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21); void Buncond(MipsLabel* label); void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 063d8bd825..6f8b3e8c57 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -21,6 +21,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPSCpuRegisterCompare { @@ -184,6 +186,63 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, return result; } + void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register, + mips::MipsLabel*), + std::string instr_name) { + mips::MipsLabel label; + (Base::GetAssembler()->*f)(mips::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + (Base::GetAssembler()->*f)(mips::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register, + mips::Register, + mips::MipsLabel*), + std::string instr_name) { + mips::MipsLabel label; + (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips::Register*> registers_; std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_; @@ -196,8 +255,6 @@ TEST_F(AssemblerMIPSTest, Toolchain) { EXPECT_TRUE(CheckTools()); } -#define __ GetAssembler()-> - TEST_F(AssemblerMIPSTest, Addu) { DriverStr(RepeatRRR(&mips::MipsAssembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "Addu"); } @@ -418,6 +475,84 @@ TEST_F(AssemblerMIPSTest, NegD) { DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD"); } +TEST_F(AssemblerMIPSTest, CunS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"), + "CunS"); +} + +TEST_F(AssemblerMIPSTest, CeqS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqS, 3, "c.eq.s $fcc{imm}, ${reg1}, ${reg2}"), + "CeqS"); +} + +TEST_F(AssemblerMIPSTest, CueqS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqS, 3, "c.ueq.s $fcc{imm}, ${reg1}, ${reg2}"), + "CueqS"); +} + +TEST_F(AssemblerMIPSTest, ColtS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtS, 3, "c.olt.s $fcc{imm}, ${reg1}, ${reg2}"), + "ColtS"); +} + +TEST_F(AssemblerMIPSTest, CultS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CultS, 3, "c.ult.s $fcc{imm}, ${reg1}, ${reg2}"), + "CultS"); +} + +TEST_F(AssemblerMIPSTest, ColeS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeS, 3, "c.ole.s $fcc{imm}, ${reg1}, ${reg2}"), + "ColeS"); +} + +TEST_F(AssemblerMIPSTest, CuleS) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleS, 3, "c.ule.s $fcc{imm}, ${reg1}, ${reg2}"), + "CuleS"); +} + +TEST_F(AssemblerMIPSTest, CunD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CunD, 3, "c.un.d $fcc{imm}, ${reg1}, ${reg2}"), + "CunD"); +} + +TEST_F(AssemblerMIPSTest, CeqD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqD, 3, "c.eq.d $fcc{imm}, ${reg1}, ${reg2}"), + "CeqD"); +} + +TEST_F(AssemblerMIPSTest, CueqD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqD, 3, "c.ueq.d $fcc{imm}, ${reg1}, ${reg2}"), + "CueqD"); +} + +TEST_F(AssemblerMIPSTest, ColtD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtD, 3, "c.olt.d $fcc{imm}, ${reg1}, ${reg2}"), + "ColtD"); +} + +TEST_F(AssemblerMIPSTest, CultD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CultD, 3, "c.ult.d $fcc{imm}, ${reg1}, ${reg2}"), + "CultD"); +} + +TEST_F(AssemblerMIPSTest, ColeD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeD, 3, "c.ole.d $fcc{imm}, ${reg1}, ${reg2}"), + "ColeD"); +} + +TEST_F(AssemblerMIPSTest, CuleD) { + DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleD, 3, "c.ule.d $fcc{imm}, ${reg1}, ${reg2}"), + "CuleD"); +} + +TEST_F(AssemblerMIPSTest, Movf) { + DriverStr(RepeatRRIb(&mips::MipsAssembler::Movf, 3, "movf ${reg1}, ${reg2}, $fcc{imm}"), "Movf"); +} + +TEST_F(AssemblerMIPSTest, Movt) { + DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt"); +} + TEST_F(AssemblerMIPSTest, CvtSW) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW"); } @@ -1000,55 +1135,11 @@ TEST_F(AssemblerMIPSTest, B) { } TEST_F(AssemblerMIPSTest, Beq) { - mips::MipsLabel label; - __ Beq(mips::A0, mips::A1, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Beq(mips::A2, mips::A3, &label); - - std::string expected = - ".set noreorder\n" - "beq $a0, $a1, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "beq $a2, $a3, 1b\n" - "nop\n"; - DriverStr(expected, "Beq"); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq"); } TEST_F(AssemblerMIPSTest, Bne) { - mips::MipsLabel label; - __ Bne(mips::A0, mips::A1, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bne(mips::A2, mips::A3, &label); - - std::string expected = - ".set noreorder\n" - "bne $a0, $a1, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bne $a2, $a3, 1b\n" - "nop\n"; - DriverStr(expected, "Bne"); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne"); } TEST_F(AssemblerMIPSTest, Beqz) { @@ -1104,60 +1195,24 @@ TEST_F(AssemblerMIPSTest, Bnez) { } TEST_F(AssemblerMIPSTest, Bltz) { - mips::MipsLabel label; - __ Bltz(mips::A0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bltz(mips::A1, &label); - - std::string expected = - ".set noreorder\n" - "bltz $a0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bltz $a1, 1b\n" - "nop\n"; - DriverStr(expected, "Bltz"); + BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz"); } TEST_F(AssemblerMIPSTest, Bgez) { - mips::MipsLabel label; - __ Bgez(mips::A0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bgez(mips::A1, &label); - - std::string expected = - ".set noreorder\n" - "bgez $a0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bgez $a1, 1b\n" - "nop\n"; - DriverStr(expected, "Bgez"); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez"); } TEST_F(AssemblerMIPSTest, Blez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez"); +} + +TEST_F(AssemblerMIPSTest, Bgtz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz"); +} + +TEST_F(AssemblerMIPSTest, Blt) { mips::MipsLabel label; - __ Blez(mips::A0, &label); + __ Blt(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -1167,23 +1222,25 @@ TEST_F(AssemblerMIPSTest, Blez) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Blez(mips::A1, &label); + __ Blt(mips::A2, mips::A3, &label); std::string expected = ".set noreorder\n" - "blez $a0, 1f\n" + "slt $at, $a0, $a1\n" + "bne $zero, $at, 1f\n" "nop\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "blez $a1, 1b\n" + "slt $at, $a2, $a3\n" + "bne $zero, $at, 1b\n" "nop\n"; - DriverStr(expected, "Blez"); + DriverStr(expected, "Blt"); } -TEST_F(AssemblerMIPSTest, Bgtz) { +TEST_F(AssemblerMIPSTest, Bge) { mips::MipsLabel label; - __ Bgtz(mips::A0, &label); + __ Bge(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -1193,23 +1250,25 @@ TEST_F(AssemblerMIPSTest, Bgtz) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Bgtz(mips::A1, &label); + __ Bge(mips::A2, mips::A3, &label); std::string expected = ".set noreorder\n" - "bgtz $a0, 1f\n" + "slt $at, $a0, $a1\n" + "beq $zero, $at, 1f\n" "nop\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bgtz $a1, 1b\n" + "slt $at, $a2, $a3\n" + "beq $zero, $at, 1b\n" "nop\n"; - DriverStr(expected, "Bgtz"); + DriverStr(expected, "Bge"); } -TEST_F(AssemblerMIPSTest, Blt) { +TEST_F(AssemblerMIPSTest, Bltu) { mips::MipsLabel label; - __ Blt(mips::A0, mips::A1, &label); + __ Bltu(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -1219,25 +1278,25 @@ TEST_F(AssemblerMIPSTest, Blt) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Blt(mips::A2, mips::A3, &label); + __ Bltu(mips::A2, mips::A3, &label); std::string expected = ".set noreorder\n" - "slt $at, $a0, $a1\n" + "sltu $at, $a0, $a1\n" "bne $zero, $at, 1f\n" "nop\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "slt $at, $a2, $a3\n" + "sltu $at, $a2, $a3\n" "bne $zero, $at, 1b\n" "nop\n"; - DriverStr(expected, "Blt"); + DriverStr(expected, "Bltu"); } -TEST_F(AssemblerMIPSTest, Bge) { +TEST_F(AssemblerMIPSTest, Bgeu) { mips::MipsLabel label; - __ Bge(mips::A0, mips::A1, &label); + __ Bgeu(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -1247,25 +1306,25 @@ TEST_F(AssemblerMIPSTest, Bge) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Bge(mips::A2, mips::A3, &label); + __ Bgeu(mips::A2, mips::A3, &label); std::string expected = ".set noreorder\n" - "slt $at, $a0, $a1\n" + "sltu $at, $a0, $a1\n" "beq $zero, $at, 1f\n" "nop\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "slt $at, $a2, $a3\n" + "sltu $at, $a2, $a3\n" "beq $zero, $at, 1b\n" "nop\n"; - DriverStr(expected, "Bge"); + DriverStr(expected, "Bgeu"); } -TEST_F(AssemblerMIPSTest, Bltu) { +TEST_F(AssemblerMIPSTest, Bc1f) { mips::MipsLabel label; - __ Bltu(mips::A0, mips::A1, &label); + __ Bc1f(0, &label); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -1275,25 +1334,23 @@ TEST_F(AssemblerMIPSTest, Bltu) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Bltu(mips::A2, mips::A3, &label); + __ Bc1f(7, &label); std::string expected = ".set noreorder\n" - "sltu $at, $a0, $a1\n" - "bne $zero, $at, 1f\n" + "bc1f $fcc0, 1f\n" "nop\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "sltu $at, $a2, $a3\n" - "bne $zero, $at, 1b\n" + "bc1f $fcc7, 1b\n" "nop\n"; - DriverStr(expected, "Bltu"); + DriverStr(expected, "Bc1f"); } -TEST_F(AssemblerMIPSTest, Bgeu) { +TEST_F(AssemblerMIPSTest, Bc1t) { mips::MipsLabel label; - __ Bgeu(mips::A0, mips::A1, &label); + __ Bc1t(0, &label); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -1303,20 +1360,18 @@ TEST_F(AssemblerMIPSTest, Bgeu) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Bgeu(mips::A2, mips::A3, &label); + __ Bc1t(7, &label); std::string expected = ".set noreorder\n" - "sltu $at, $a0, $a1\n" - "beq $zero, $at, 1f\n" + "bc1t $fcc0, 1f\n" "nop\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "sltu $at, $a2, $a3\n" - "beq $zero, $at, 1b\n" + "bc1t $fcc7, 1b\n" "nop\n"; - DriverStr(expected, "Bgeu"); + DriverStr(expected, "Bc1t"); } #undef __ diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ba2525e555..cfd8421e93 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -19,15 +19,73 @@ #include "base/bit_utils.h" #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "memory_region.h" #include "thread.h" namespace art { namespace mips64 { +void Mips64Assembler::FinalizeCode() { + for (auto& exception_block : exception_blocks_) { + EmitExceptionPoll(&exception_block); + } + PromoteBranches(); +} + +void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + Assembler::FinalizeInstructions(region); + PatchCFI(); +} + +void Mips64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Mips64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + void Mips64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, @@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) { CHECK_NE(rs, kNoGpuRegister); + CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | - (imm21 & 0x1FFFFF); + imm21; Emit(encoding); } -void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) { - uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | - (addr26 & 0x3FFFFFF); +void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) { + CHECK(IsUint<26>(imm26)) << imm26; + uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); } @@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xb, rs, rt, imm16); } -void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::J(uint32_t addr26) { - EmitJ(0x2, addr26); - Nop(); -} - -void Mips64Assembler::Jal(uint32_t addr26) { - EmitJ(0x3, addr26); - Nop(); -} - void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x35); } @@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) { void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) { EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09); - Nop(); } void Mips64Assembler::Jalr(GpuRegister rs) { @@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) { EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16); } +void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, imm19); +} + +void Mips64Assembler::Bc(uint32_t imm26) { + EmitI26(0x32, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) { @@ -569,6 +616,81 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0x9, ft, imm16); +} + +void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0xD, ft, imm16); +} + +void Mips64Assembler::EmitBcondc(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { + switch (cond) { + case kCondLT: + Bltc(rs, rt, imm16_21); + break; + case kCondGE: + Bgec(rs, rt, imm16_21); + break; + case kCondLE: + Bgec(rt, rs, imm16_21); + break; + case kCondGT: + Bltc(rt, rs, imm16_21); + break; + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltzc(rs, imm16_21); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgezc(rs, imm16_21); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blezc(rs, imm16_21); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtzc(rs, imm16_21); + break; + case kCondEQ: + Beqc(rs, rt, imm16_21); + break; + case kCondNE: + Bnec(rs, rt, imm16_21); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqzc(rs, imm16_21); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnezc(rs, imm16_21); + break; + case kCondLTU: + Bltuc(rs, rt, imm16_21); + break; + case kCondGEU: + Bgeuc(rs, rt, imm16_21); + break; + case kCondF: + CHECK_EQ(rt, ZERO); + Bc1eqz(static_cast<FpuRegister>(rs), imm16_21); + break; + case kCondT: + CHECK_EQ(rt, ZERO); + Bc1nez(static_cast<FpuRegister>(rs), imm16_21); + break; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -721,6 +843,86 @@ void Mips64Assembler::MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); } +void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x13); +} + +void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x13); +} + void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20); } @@ -925,15 +1127,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } } -void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) { - if (IsInt<16>(value)) { - Addiu(rt, rs, value); - } else { - LoadConst32(rtmp, value); - Addu(rt, rs, rtmp); - } -} - void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -943,177 +1136,637 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp } } -// -// MIPS64R6 branches -// -// -// Unconditional (pc + 32-bit signed offset): -// -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Conditional (pc + 32-bit signed offset): -// -// b<cond>c reg, +2 // skip next 2 instructions -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Unconditional (pc + 32-bit signed offset) and link: -// -// auipc reg, ofs_high -// daddiu reg, ofs_low -// jialc reg, 0 -// // no delay/forbidden slot -// -// -// TODO: use shorter instruction sequences whenever possible. -// - -void Mips64Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int32_t bound_pc = buffer_.Size(); +void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size, + Mips64Assembler::Branch::Type short_type, + Mips64Assembler::Branch::Type long_type) { + type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; +} - // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label. - // Embed the previously unknown pc-relative addresses in them. - while (label->IsLinked()) { - int32_t position = label->Position(); - // Extract the branch (instruction pair) - uint32_t auipc = buffer_.Load<uint32_t>(position); - uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu +void Mips64Assembler::Branch::InitializeType(bool is_call) { + OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + if (is_call) { + InitShortOrLong(offset_size, kCall, kLongCall); + } else if (condition_ == kUncond) { + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + } else { + if (condition_ == kCondEQZ || condition_ == kCondNEZ) { + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + } else { + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + } + } + old_type_ = type_; +} - // Extract the location of the previous pair in the list (walking the list backwards; - // the previous pair location was stored in the immediate operands of the instructions) - int32_t prev = (auipc << 16) | (jic & 0xFFFF); +bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { + switch (condition) { + case kCondLT: + case kCondGT: + case kCondNE: + case kCondLTU: + return lhs == rhs; + default: + return false; + } +} - // Get the pc-relative address - uint32_t offset = bound_pc - position; - offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu +bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondGE: + case kCondLE: + case kCondEQ: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} - // Embed it in the two instructions - auipc = (auipc & 0xFFFF0000) | (offset >> 16); - jic = (jic & 0xFFFF0000) | (offset & 0xFFFF); +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(ZERO), + rhs_reg_(ZERO), + condition_(kUncond) { + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Mips64Assembler::BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + CHECK_NE(condition, kUncond); + switch (condition) { + case kCondEQ: + case kCondNE: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + CHECK_NE(lhs_reg, ZERO); + CHECK_NE(rhs_reg, ZERO); + break; + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + CHECK_NE(lhs_reg, ZERO); + CHECK_EQ(rhs_reg, ZERO); + break; + case kCondF: + case kCondT: + CHECK_EQ(rhs_reg, ZERO); + break; + case kUncond: + UNREACHABLE(); + } + CHECK(!IsNop(condition, lhs_reg, rhs_reg)); + if (IsUncond(condition, lhs_reg, rhs_reg)) { + // Branch condition is always true, make the branch unconditional. + condition_ = kUncond; + } + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(indirect_reg), + rhs_reg_(ZERO), + condition_(kUncond) { + CHECK_NE(indirect_reg, ZERO); + CHECK_NE(indirect_reg, AT); + InitializeType(true); +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( + Mips64Assembler::BranchCondition cond) { + switch (cond) { + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTZ: + return kCondGEZ; + case kCondGEZ: + return kCondLTZ; + case kCondLEZ: + return kCondGTZ; + case kCondGTZ: + return kCondLEZ; + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondEQZ: + return kCondNEZ; + case kCondNEZ: + return kCondEQZ; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kCondF: + return kCondT; + case kCondT: + return kCondF; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + } + UNREACHABLE(); +} - // Save the adjusted instructions - buffer_.Store<uint32_t>(position, auipc); - buffer_.Store<uint32_t>(position + 4, jic); +Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const { + return type_; +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const { + return condition_; +} + +GpuRegister Mips64Assembler::Branch::GetLeftRegister() const { + return lhs_reg_; +} + +GpuRegister Mips64Assembler::Branch::GetRightRegister() const { + return rhs_reg_; +} + +uint32_t Mips64Assembler::Branch::GetTarget() const { + return target_; +} + +uint32_t Mips64Assembler::Branch::GetLocation() const { + return location_; +} + +uint32_t Mips64Assembler::Branch::GetOldLocation() const { + return old_location_; +} + +uint32_t Mips64Assembler::Branch::GetLength() const { + return branch_info_[type_].length; +} + +uint32_t Mips64Assembler::Branch::GetOldLength() const { + return branch_info_[old_type_].length; +} + +uint32_t Mips64Assembler::Branch::GetSize() const { + return GetLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOldSize() const { + return GetOldLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetEndLocation() const { + return GetLocation() + GetSize(); +} + +uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldSize(); +} + +bool Mips64Assembler::Branch::IsLong() const { + switch (type_) { + // Short branches. + case kUncondBranch: + case kCondBranch: + case kCall: + return false; + // Long branches. + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + return true; + } + UNREACHABLE(); +} + +bool Mips64Assembler::Branch::IsResolved() const { + return target_ != kUnresolved; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + OffsetBits offset_size = + (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + ? kOffset23 + : branch_info_[type_].offset_size; + return offset_size; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location, + uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) + return kOffset16; + int64_t distance = static_cast<int64_t>(target) - location; + // To simplify calculations in composite branches consisting of multiple instructions + // bump up the distance by a value larger than the max byte size of a composite branch. + distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize; + if (IsInt<kOffset16>(distance)) + return kOffset16; + else if (IsInt<kOffset18>(distance)) + return kOffset18; + else if (IsInt<kOffset21>(distance)) + return kOffset21; + else if (IsInt<kOffset23>(distance)) + return kOffset23; + else if (IsInt<kOffset28>(distance)) + return kOffset28; + return kOffset32; +} + +void Mips64Assembler::Branch::Resolve(uint32_t target) { + target_ = target; +} + +void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + if (location_ > expand_location) { + location_ += delta; + } + if (!IsResolved()) { + return; // Don't know the target yet. + } + if (target_ > expand_location) { + target_ += delta; + } +} + +void Mips64Assembler::Branch::PromoteToLong() { + switch (type_) { + // Short branches. + case kUncondBranch: + type_ = kLongUncondBranch; + break; + case kCondBranch: + type_ = kLongCondBranch; + break; + case kCall: + type_ = kLongCall; + break; + default: + // Note: 'type_' is already long. + break; + } + CHECK(IsLong()); +} + +uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { + // If the branch is still unresolved or already long, nothing to do. + if (IsLong() || !IsResolved()) { + return 0; + } + // Promote the short branch to long if the offset size is too small + // to hold the distance between location_ and target_. + if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + // The following logic is for debugging/testing purposes. + // Promote some short branches to long when it's not really required. + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + int64_t distance = static_cast<int64_t>(target_) - location_; + distance = (distance >= 0) ? distance : -distance; + if (distance >= max_short_distance) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + } + return 0; +} + +uint32_t Mips64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + // Prepare the offset for encoding into the instruction(s). + offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; + return offset; +} + +Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Mips64Assembler::Bind(Mips64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); + + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); + + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); // On to the previous branch in the list... label->position_ = prev; } - // Now make the label object contain its own location - // (it will be used by the branches referring to and following this label) + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + label->prev_branch_id_plus_one_ = branches_.size(); + if (label->prev_branch_id_plus_one_) { + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + bound_pc -= branch->GetEndLocation(); + } label->BindTo(bound_pc); } -void Mips64Assembler::B(Label* label) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in jic - Auipc(AT, offset >> 16); - Jic(AT, offset); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(AT, prev >> 16); - Jic(AT, prev); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_plus_one_) { + // Get label location based on the branch preceding it. + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + target += branch->GetEndLocation(); + } + return target; +} + +uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetSize() - branch->GetOldSize(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { + uint32_t length = branches_.back().GetLength(); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + while (length--) { + Nop(); } } -void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in daddiu - Auipc(indirect_reg, offset >> 16); - Daddiu(indirect_reg, indirect_reg, offset); - Jialc(indirect_reg, 0); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(indirect_reg, prev >> 16); - Daddiu(indirect_reg, indirect_reg, prev); - Jialc(indirect_reg, 0); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +void Mips64Assembler::Buncond(Mips64Label* label) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, indirect_reg); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0; ) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } +} + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { + // Short branches. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch + { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + // Long branches. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch + { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch + { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall +}; + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + uint32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + GpuRegister lhs = branch->GetLeftRegister(); + GpuRegister rhs = branch->GetRightRegister(); + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondc(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden/delay slot. + break; + case Branch::kCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + Jialc(lhs, 0); + break; + + // Long branches. + case Branch::kLongUncondBranch: + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCondBranch: + EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(lhs, High16Bits(offset)); + Daddiu(lhs, lhs, Low16Bits(offset)); + Jialc(lhs, 0); + break; } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); +} + +void Mips64Assembler::Bc(Mips64Label* label) { + Buncond(label); +} + +void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { + Call(label, indirect_reg); +} + +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLT, rs, rt); +} + +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTZ, rt); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgec(rs, rt, 2); - B(label); +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGTZ, rt); } -void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) { - Bgezc(rt, 2); - B(label); +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGE, rs, rt); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) { - Blezc(rt, 2); - B(label); +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEZ, rt); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) { - Bltc(rs, rt, 2); - B(label); +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLEZ, rt); } -void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) { - Bltzc(rt, 2); - B(label); +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTU, rs, rt); } -void Mips64Assembler::Blezc(GpuRegister rt, Label* label) { - Bgtzc(rt, 2); - B(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEU, rs, rt); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgeuc(rs, rt, 2); - B(label); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondEQ, rs, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bltuc(rs, rt, 2); - B(label); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondNE, rs, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) { - Bnec(rs, rt, 2); - B(label); +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondEQZ, rs); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) { - Beqc(rs, rt, 2); - B(label); +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondNEZ, rs); } -void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) { - Bnezc(rs, 2); - B(label); +void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) { - Beqzc(rs, 2); - B(label); +void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); } void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1256,6 +1909,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1298,6 +1952,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address @@ -1316,6 +1971,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Then jump to the return address. Jr(RA); + Nop(); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -1324,12 +1980,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(-adjust)); cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(adjust)); cfi_.AdjustCFAOffset(-adjust); } @@ -1379,17 +2037,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) { - Mips64ManagedRegister scratch = mscratch.AsMips64(); - CHECK(scratch.IsGpuRegister()) << scratch; - // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?). - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsGpuRegister(), imm); - StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); -} - -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1398,7 +2046,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -1415,7 +2063,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -1449,18 +2099,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, } void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<8> offs) { + ThreadOffset<kMipsDoublewordSize> offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); } -void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64"; } -void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64"; } void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { @@ -1492,7 +2144,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, } void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, + ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; @@ -1500,7 +2152,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1561,9 +2213,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, @@ -1584,15 +2239,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset -/*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } -void Mips64Assembler::MemoryBarrier(ManagedRegister) { +void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) { // TODO: sync? - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, @@ -1604,7 +2262,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; CHECK(out_reg.IsGpuRegister()) << out_reg; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) @@ -1631,7 +2289,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is @@ -1653,7 +2311,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Mips64ManagedRegister in_reg = min_reg.AsMips64(); CHECK(out_reg.IsGpuRegister()) << out_reg; CHECK(in_reg.IsGpuRegister()) << in_reg; - Label null_arg; + Mips64Label null_arg; if (!out_reg.Equals(in_reg)) { LoadConst32(out_reg.AsGpuRegister(), 0); } @@ -1663,11 +2321,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Bind(&null_arg); } -void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } -void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } @@ -1679,6 +2339,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), base.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } @@ -1691,11 +2352,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { @@ -1703,37 +2366,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { } void Mips64Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*mscratch*/) { + ManagedRegister mscratch ATTRIBUTE_UNUSED) { StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); } void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { Mips64ManagedRegister scratch = mscratch.AsMips64(); - Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), - S1, Thread::ExceptionOffset<8>().Int32Value()); - Bnezc(scratch.AsGpuRegister(), slow->Entry()); -} - -void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { - Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); + exception_blocks_.emplace_back(scratch, stack_adjust); + LoadFromOffset(kLoadDoubleword, + scratch.AsGpuRegister(), + S1, + Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value()); + Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); +} + +void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { + Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); } - // Pass exception object as argument - // Don't care about preserving A0 as this call won't return - __ Move(A0, scratch_.AsGpuRegister()); + // Pass exception object as argument. + // Don't care about preserving A0 as this call won't return. + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); + Move(A0, exception->scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException - __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); - // TODO: check T9 usage - __ Jr(T9); + LoadFromOffset(kLoadDoubleword, + T9, + S1, + QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value()); + Jr(T9); + Nop(); + // Call never returns - __ Break(); -#undef __ + Break(); } } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 42962bca20..883f013f87 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,18 +17,22 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <utility> #include <vector> #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" -#include "utils/assembler.h" #include "offsets.h" +#include "utils/assembler.h" +#include "utils/label.h" namespace art { namespace mips64 { +static constexpr size_t kMipsDoublewordSize = 8; + enum LoadOperandType { kLoadSignedByte, kLoadUnsignedByte, @@ -60,10 +64,57 @@ enum FPClassMaskType { kPositiveZero = 0x200, }; +class Mips64Label : public Label { + public: + Mips64Label() : prev_branch_id_plus_one_(0) {} + + Mips64Label(Mips64Label&& src) + : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + + private: + uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64Label); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null. +class Mips64ExceptionSlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), + exception_entry_(std::move(src.exception_entry_)) {} + + private: + Mips64Label* Entry() { return &exception_entry_; } + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; + Mips64Label exception_entry_; + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); +}; + class Mips64Assembler FINAL : public Assembler { public: - Mips64Assembler() {} - virtual ~Mips64Assembler() {} + Mips64Assembler() + : overwriting_(false), + overwrite_location_(0), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Mips64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler { void Dclz(GpuRegister rd, GpuRegister rs); void Dclo(GpuRegister rd, GpuRegister rs); - void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void J(uint32_t addr26); - void Jal(uint32_t addr26); void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); void Auipc(GpuRegister rs, uint16_t imm16); + void Addiupc(GpuRegister rs, uint32_t imm19); + void Bc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -178,6 +227,8 @@ class Mips64Assembler FINAL : public Assembler { void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); void Beqzc(GpuRegister rs, uint32_t imm21); void Bnezc(GpuRegister rs, uint32_t imm21); + void Bc1eqz(FpuRegister ft, uint16_t imm16); + void Bc1nez(FpuRegister ft, uint16_t imm16); void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); @@ -217,6 +268,26 @@ class Mips64Assembler FINAL : public Assembler { void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void Cvtsw(FpuRegister fd, FpuRegister fs); void Cvtdw(FpuRegister fd, FpuRegister fs); @@ -240,32 +311,36 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); - // Higher level composite instructions + // Higher level composite instructions. void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 - void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { - B(label); + void Bind(Label* label) OVERRIDE { + Bind(down_cast<Mips64Label*>(label)); + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } - void B(Label* label); - void Jalr(Label* label, GpuRegister indirect_reg = RA); - // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); - void Bltzc(GpuRegister rt, Label* label); - void Bgtzc(GpuRegister rt, Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); - void Bgezc(GpuRegister rt, Label* label); - void Blezc(GpuRegister rt, Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); - void Beqzc(GpuRegister rs, Label* label); - void Bnezc(GpuRegister rs, Label* label); + + void Bind(Mips64Label* label); + void Bc(Mips64Label* label); + void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bltzc(GpuRegister rt, Mips64Label* label); + void Bgtzc(GpuRegister rt, Mips64Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgezc(GpuRegister rt, Mips64Label* label); + void Blezc(GpuRegister rt, Mips64Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqzc(GpuRegister rs, Mips64Label* label); + void Bnezc(GpuRegister rs, Mips64Label* label); + void Bc1eqz(FpuRegister ft, Mips64Label* label); + void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -277,43 +352,42 @@ class Mips64Assembler FINAL : public Assembler { void Emit(uint32_t value); // - // Overridden common assembler high-level functionality + // Overridden common assembler high-level functionality. // - // Emit code that will create an activation on the stack + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - // Emit code that will remove an activation from the stack + // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; - // Store routines + // Store routines. void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; - // Load routines + // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + void LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -322,15 +396,16 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE; - // Copying routines + // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; @@ -354,13 +429,13 @@ class Mips64Assembler FINAL : public Assembler { void MemoryBarrier(ManagedRegister) OVERRIDE; - // Sign extension + // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Zero extension + // Zero extension. void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Exploit fast access in managed code to Thread::Current() + // Exploit fast access in managed code to Thread::Current(). void GetCurrentThread(ManagedRegister tr) OVERRIDE; void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; @@ -376,7 +451,7 @@ class Mips64Assembler FINAL : public Assembler { void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, bool null_allowed) OVERRIDE; - // src holds a handle scope entry (Object**) load this into dst + // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; // Heap::VerifyObject on src. In some cases (such as a reference to this) we @@ -384,37 +459,255 @@ class Mips64Assembler FINAL : public Assembler { void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - // Call to address held at [base+offset] + // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset, + ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + // Emit slow paths queued during assembly and promote short branches to long if needed. + void FinalizeCode() OVERRIDE; + + // Emit branches and finalize all instructions. + void FinalizeInstructions(const MemoryRegion& region); + + // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, + // must be used instead of Mips64Label::GetPosition()). + uint32_t GetLabelLocation(Mips64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + enum BranchCondition { + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTZ, + kCondGEZ, + kCondLEZ, + kCondGTZ, + kCondEQ, + kCondNE, + kCondEQZ, + kCondNEZ, + kCondLTU, + kCondGEU, + kCondF, // Floating-point predicate false. + kCondT, // Floating-point predicate true. + kUncond, + }; + friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + private: + class Branch { + public: + enum Type { + // Short branches. + kUncondBranch, + kCondBranch, + kCall, + // Long branches. + kLongUncondBranch, + kLongCondBranch, + kLongCall, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset16 = 16, + kOffset18 = 18, + kOffset21 = 21, + kOffset23 = 23, + kOffset28 = 28, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr int32_t kMaxBranchLength = 32; + static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + + struct BranchInfo { + // Branch length as a number of 4-byte-long instructions. + uint32_t length; + // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's + // PC-relative offset (or its most significant 16-bit half, which goes first). + uint32_t instr_offset; + // Different MIPS instructions with PC-relative offsets apply said offsets to slightly + // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte + // instructions) from the instruction containing the offset. + uint32_t pc_org; + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is + // an exception: use kOffset23 for beqzc/bnezc). + OffsetBits offset_size; + // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift + // count. + int offset_shift; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch. + Branch(uint32_t location, uint32_t target); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg = ZERO); + // Call (branch and link) that stores the target address in a given register (i.e. T9). + Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. + // So, we need a way to identify such branches in order to emit no instructions for them + // or change them to unconditional. + static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + GpuRegister GetLeftRegister() const; + GpuRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetSize() const; + uint32_t GetOldSize() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsLong() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + // + // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), + // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. + // + // Composite branches (made of several instructions) with longer reach have 32-bit + // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). + // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, + // however. Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. + // + // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special + // case with the addiu instruction and a 16 bit offset. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If the branch is short, changes its type to long. + void PromoteToLong(); + + // If necessary, updates the type by promoting a short branch to a long branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + // max_short_distance caps the maximum distance between location_ and target_ + // that is allowed for short branches. This is for debugging/testing purposes. + // max_short_distance = 0 forces all short branches to become long. + // Use the implicit default argument when not debugging/testing. + uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + + // Returns the location of the instruction(s) containing the offset. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + uint32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(bool is_call); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + GpuRegister lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + GpuRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); + friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); - void EmitJ(int opcode, uint32_t addr26); + void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); - DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); -}; + void Buncond(Mips64Label* label); + void Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs = ZERO); + void Call(Mips64Label* label, GpuRegister indirect_reg); + void FinalizeLabeledBranch(Mips64Label* label); -// Slowpath entered when Thread::Current()->_exception is non-null -class Mips64ExceptionSlowPath FINAL : public SlowPath { - public: - Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const Mips64ManagedRegister scratch_; - const size_t stack_adjust_; + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void PromoteBranches(); + void EmitBranch(Branch* branch); + void EmitBranches(); + void PatchCFI(); + + // Emits exception block. + void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + + // List of exception blocks to generate at the end of the code cache. + std::vector<Mips64ExceptionSlowPath> exception_blocks_; + + std::vector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 4413906fd7..bac4375b35 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -24,6 +24,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPS64CpuRegisterCompare { @@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return "mips64"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -march=mips64r6"; + // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 + // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return secondary_register_names_[reg]; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips64::GpuRegister*> registers_; std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; @@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - /////////////////// // FP Operations // /////////////////// @@ -319,6 +403,106 @@ TEST_F(AssemblerMIPS64Test, MaxD) { DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d"); } +TEST_F(AssemblerMIPS64Test, CmpUnS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUnD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.d"); +} + TEST_F(AssemblerMIPS64Test, CvtDL) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l"); } @@ -348,7 +532,255 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { //////////////// TEST_F(AssemblerMIPS64Test, Jalr) { - DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); + DriverStr(".set noreorder\n" + + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + +TEST_F(AssemblerMIPS64Test, Jialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + std::string expected = + ".set noreorder\n" + "lapc $t9, 1f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "lapc $t9, 2f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "lapc $t9, 1b\n" + "jialc $t9, 0\n"; + DriverStr(expected, "Jialc"); +} + +TEST_F(AssemblerMIPS64Test, LongJialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr uint32_t kAdduCount1 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr uint32_t kAdduCount2 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "1:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "2:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "jialc $t9, 0\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongJialc"); +} + +TEST_F(AssemblerMIPS64Test, Bc) { + mips64::Mips64Label label1, label2; + __ Bc(&label1); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Bc(&label2); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Bc(&label1); + + std::string expected = + ".set noreorder\n" + "bc 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc 1b\n"; + DriverStr(expected, "Bc"); +} + +TEST_F(AssemblerMIPS64Test, Beqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS64Test, Bnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS64Test, Bltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS64Test, Bgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS64Test, Blezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS64Test, Bgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS64Test, Beqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS64Test, Bnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS64Test, Bltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS64Test, Bgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS64Test, Bltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS64Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS64Test, Bc1eqz) { + mips64::Mips64Label label; + __ Bc1eqz(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1eqz(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1eqz $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1eqz $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1eqz"); +} + +TEST_F(AssemblerMIPS64Test, Bc1nez) { + mips64::Mips64Label label; + __ Bc1nez(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1nez(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1nez $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1nez $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1nez"); +} + +TEST_F(AssemblerMIPS64Test, LongBeqc) { + mips64::Mips64Label label; + __ Beqc(mips64::A0, mips64::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Beqc(mips64::A2, mips64::A3, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } ////////// |