diff options
Diffstat (limited to 'compiler/utils')
| -rw-r--r-- | compiler/utils/arm/assembler_arm.cc | 8 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm32.h | 2 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2.cc | 129 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2.h | 14 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2_test.cc | 40 | ||||
| -rw-r--r-- | compiler/utils/assembler_test.h | 5 | ||||
| -rw-r--r-- | compiler/utils/assembler_thumb_test.cc | 103 | ||||
| -rw-r--r-- | compiler/utils/assembler_thumb_test_expected.cc.inc | 142 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 12 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips.h | 2 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 1007 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 387 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64_test.cc | 286 |
13 files changed, 1763 insertions, 374 deletions
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 68e39568bb..dead8fd9a8 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -342,9 +342,9 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kLoadSWord: case kLoadDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kLoadWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -360,9 +360,9 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kStoreSWord: case kStoreDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kStoreWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 5233dcbbb0..ce3a87275d 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -389,8 +389,6 @@ class Arm32Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link); static int32_t EncodeBranchOffset(int offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op); }; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 297cc54e29..7ad5b440e0 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1349,7 +1349,8 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, int32_t encoding = 0; if (so.IsImmediate()) { // Check special cases. - if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) { + if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) && + /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) { if (set_cc != kCcSet) { if (opcode == SUB) { thumb_opcode = 5U; @@ -3220,7 +3221,7 @@ void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm, void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) { CheckCondition(cond); - EmitShift(rd, rm, RRX, rm, cond, set_cc); + EmitShift(rd, rm, RRX, 0, cond, set_cc); } @@ -3469,6 +3470,73 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) } } +int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) { + switch (type) { + case kLoadSignedByte: + case kLoadSignedHalfword: + case kLoadUnsignedHalfword: + case kLoadUnsignedByte: + case kLoadWord: + // We can encode imm12 offset. + return 0xfffu; + case kLoadSWord: + case kLoadDWord: + case kLoadWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) { + switch (type) { + case kStoreHalfword: + case kStoreByte: + case kStoreWord: + // We can encode imm12 offset. + return 0xfff; + case kStoreSWord: + case kStoreDWord: + case kStoreWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store) { + int32_t other_bits = offset & ~allowed_offset_bits; + if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { + *add_to_base = offset & ~allowed_offset_bits; + *offset_for_load_store = offset & allowed_offset_bits; + return true; + } + return false; +} + +int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond) { + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + AddConstant(temp, base, add_to_base, cond, kCcKeep); + return offset_for_load; + } else { + LoadImmediate(temp, offset, cond); + add(temp, temp, ShifterOperand(base), cond, kCcKeep); + return 0; + } +} // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb. @@ -3479,12 +3547,26 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(type, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; + // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. + int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + AddConstant(reg, base, add_to_base, cond, kCcKeep); + base = reg; + offset = offset_for_load; + } else { + Register temp = (reg == base) ? IP : reg; + LoadImmediate(temp, offset, cond); + // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep); + base = reg; + offset = 0; + } } - CHECK(Address::CanHoldLoadOffsetThumb(type, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(type, offset)); switch (type) { case kLoadSignedByte: ldrsb(reg, Address(base, offset), cond); @@ -3510,7 +3592,6 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, } } - // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset. void Thumb2Assembler::LoadSFromOffset(SRegister reg, @@ -3519,12 +3600,10 @@ void Thumb2Assembler::LoadSFromOffset(SRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); vldrs(reg, Address(base, offset), cond); } @@ -3537,12 +3616,10 @@ void Thumb2Assembler::LoadDFromOffset(DRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); vldrd(reg, Address(base, offset), cond); } @@ -3573,12 +3650,12 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, offset += kRegisterSize; } } - LoadImmediate(tmp_reg, offset, cond); - add(tmp_reg, tmp_reg, ShifterOperand(base), AL); + // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() + // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond); base = tmp_reg; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(type, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(type, offset)); switch (type) { case kStoreByte: strb(reg, Address(base, offset), cond); @@ -3611,12 +3688,10 @@ void Thumb2Assembler::StoreSToOffset(SRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); vstrs(reg, Address(base, offset), cond); } @@ -3629,12 +3704,10 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); vstrd(reg, Address(base, offset), cond); } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index e18361300a..9aeece8e57 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -729,13 +729,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, Condition cond = AL, SetCc set_cc = kCcDontCare); void EmitShift(Register rd, Register rn, Shift shift, Register rm, Condition cond = AL, SetCc set_cc = kCcDontCare); + static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); + static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); + bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store); + int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond); + // Whether the assembler can relocate branches. If false, unresolved branches will be // emitted on 32bits. bool can_relocate_branches_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index cb4b20b5ba..7b32b0fd26 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -243,7 +243,7 @@ TEST_F(AssemblerThumb2Test, sub) { const char* expected = "subs r1, r0, #42\n" - "subw r1, r0, #42\n" + "sub.w r1, r0, #42\n" "subs r1, r0, r2, asr #31\n" "sub r1, r0, r2, asr #31\n"; DriverStr(expected, "sub"); @@ -257,7 +257,7 @@ TEST_F(AssemblerThumb2Test, add) { const char* expected = "adds r1, r0, #42\n" - "addw r1, r0, #42\n" + "add.w r1, r0, #42\n" "adds r1, r0, r2, asr #31\n" "add r1, r0, r2, asr #31\n"; DriverStr(expected, "add"); @@ -305,21 +305,18 @@ TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { __ StoreToOffset(type, arm::IP, arm::R5, offset); const char* expected = - "mov ip, #4096\n" // LoadImmediate(ip, 4096) - "add ip, ip, sp\n" + "add.w ip, sp, #4096\n" // AddConstant(ip, sp, 4096) "str r0, [ip, #0]\n" - "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize) - "add r5, r5, sp\n" - "str ip, [r5, #0]\n" - "ldr r5, [sp], #4\n" // Pop(r5) - - "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #4096\n" // LoadImmediate(r6, 4096) - "add r6, r6, r5\n" - "str ip, [r6, #0]\n" - "ldr r6, [sp], #4\n"; // Pop(r6) + "str r5, [sp, #-4]!\n" // Push(r5) + "add.w r5, sp, #4096\n" // AddConstant(r5, 4100 & ~0xfff) + "str ip, [r5, #4]\n" // StoreToOffset(type, ip, r5, 4100 & 0xfff) + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "add.w r6, r5, #4096\n" // AddConstant(r6, r5, 4096 & ~0xfff) + "str ip, [r6, #0]\n" // StoreToOffset(type, ip, r6, 4096 & 0xfff) + "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordToNonThumbOffset"); } @@ -360,20 +357,17 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { __ StoreToOffset(type, arm::R11, arm::R5, offset); const char* expected = - "mov ip, #1024\n" // LoadImmediate(ip, 1024) - "add ip, ip, sp\n" + "add.w ip, sp, #1024\n" // AddConstant(ip, sp, 1024) "strd r0, r1, [ip, #0]\n" "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) - "add r5, r5, sp\n" - "strd r11, ip, [r5, #0]\n" + "add.w r5, sp, #1024\n" // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc) + "strd r11, ip, [r5, #4]\n" // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc) "ldr r5, [sp], #4\n" // Pop(r5) "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #1024\n" // LoadImmediate(r6, 1024) - "add r6, r6, r5\n" - "strd r11, ip, [r6, #0]\n" + "add.w r6, r5, #1024\n" // AddConstant(r6, r5, 1024 & ~0x3fc) + "strd r11, ip, [r6, #0]\n" // StoreToOffset(type, r11, r6, 1024 & 0x3fc) "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordPairToNonThumbOffset"); } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index f1233ca457..9457da1c36 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -840,12 +840,17 @@ class AssemblerTest : public testing::Test { return str; } + // Override this to pad the code with NOPs to a certain size if needed. + virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { + } + void DriverWrapper(std::string assembly_text, std::string test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); assembler_->FinalizeInstructions(code); + Pad(*data); test_helper_->Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 2ae88413e7..1de51a2dc8 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -466,6 +466,38 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { EmitAndCheck(&assembler, "DataProcessingShiftedRegister"); } +TEST(Thumb2AssemblerTest, ShiftImmediate) { + // Note: This test produces the same results as DataProcessingShiftedRegister + // but it does so using shift functions instead of mov(). + arm::Thumb2Assembler assembler; + + // 16-bit variants. + __ Lsl(R3, R4, 4); + __ Lsr(R3, R4, 5); + __ Asr(R3, R4, 6); + + // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts. + __ Ror(R3, R4, 7); + + // 32-bit RRX because RRX has no 16-bit version. + __ Rrx(R3, R4); + + // 32 bit variants (not setting condition codes). + __ Lsl(R3, R4, 4, AL, kCcKeep); + __ Lsr(R3, R4, 5, AL, kCcKeep); + __ Asr(R3, R4, 6, AL, kCcKeep); + __ Ror(R3, R4, 7, AL, kCcKeep); + __ Rrx(R3, R4, AL, kCcKeep); + + // 32 bit variants (high registers). + __ Lsls(R8, R4, 4); + __ Lsrs(R8, R4, 5); + __ Asrs(R8, R4, 6); + __ Rors(R8, R4, 7); + __ Rrxs(R8, R4); + + EmitAndCheck(&assembler, "ShiftImmediate"); +} TEST(Thumb2AssemblerTest, BasicLoad) { arm::Thumb2Assembler assembler; @@ -823,29 +855,80 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ add(R2, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. __ add(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. + __ add(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. - __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit - __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit - __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. + __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit + __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit + __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. - __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. EmitAndCheck(&assembler, "SpecialAddSub"); } +TEST(Thumb2AssemblerTest, LoadFromOffset) { + arm::Thumb2Assembler assembler; + + __ LoadFromOffset(kLoadWord, R2, R4, 12); + __ LoadFromOffset(kLoadWord, R2, R4, 0xfff); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadWord, R2, R4, 0x101000); + __ LoadFromOffset(kLoadWord, R4, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000); + __ LoadFromOffset(kLoadWordPair, R2, R4, 12); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400); + __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400); + + __ LoadFromOffset(kLoadWord, R0, R12, 12); // 32-bit because of R12. + __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000); + + __ LoadFromOffset(kLoadSignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12); + + EmitAndCheck(&assembler, "LoadFromOffset"); +} + TEST(Thumb2AssemblerTest, StoreToOffset) { arm::Thumb2Assembler assembler; - __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple - __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. - __ StoreToOffset(kStoreWord, R0, R12, 12); - __ StoreToOffset(kStoreHalfword, R0, R12, 12); - __ StoreToOffset(kStoreByte, R2, R12, 12); + __ StoreToOffset(kStoreWord, R2, R4, 12); + __ StoreToOffset(kStoreWord, R2, R4, 0xfff); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreWord, R2, R4, 0x101000); + __ StoreToOffset(kStoreWord, R4, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R2, R4, 12); + __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000); + __ StoreToOffset(kStoreWordPair, R2, R4, 12); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400); + __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400); + + __ StoreToOffset(kStoreWord, R0, R12, 12); // 32-bit because of R12. + __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000); + + __ StoreToOffset(kStoreByte, R2, R4, 12); EmitAndCheck(&assembler, "StoreToOffset"); } - TEST(Thumb2AssemblerTest, IfThen) { arm::Thumb2Assembler assembler; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index b79c2e46f0..9246c827a7 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -132,8 +132,8 @@ const char* DataProcessingRegisterResults[] = { const char* DataProcessingImmediateResults[] = { " 0: 2055 movs r0, #85 ; 0x55\n", " 2: f06f 0055 mvn.w r0, #85 ; 0x55\n", - " 6: f201 0055 addw r0, r1, #85 ; 0x55\n", - " a: f2a1 0055 subw r0, r1, #85 ; 0x55\n", + " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n", + " a: f1a1 0055 sub.w r0, r1, #85 ; 0x55\n", " e: f001 0055 and.w r0, r1, #85 ; 0x55\n", " 12: f041 0055 orr.w r0, r1, #85 ; 0x55\n", " 16: f061 0055 orn r0, r1, #85 ; 0x55\n", @@ -201,6 +201,24 @@ const char* DataProcessingShiftedRegisterResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; +const char* ShiftImmediateResults[] = { + " 0: 0123 lsls r3, r4, #4\n", + " 2: 0963 lsrs r3, r4, #5\n", + " 4: 11a3 asrs r3, r4, #6\n", + " 6: ea4f 13f4 mov.w r3, r4, ror #7\n", + " a: ea4f 0334 mov.w r3, r4, rrx\n", + " e: ea4f 1304 mov.w r3, r4, lsl #4\n", + " 12: ea4f 1354 mov.w r3, r4, lsr #5\n", + " 16: ea4f 13a4 mov.w r3, r4, asr #6\n", + " 1a: ea4f 13f4 mov.w r3, r4, ror #7\n", + " 1e: ea4f 0334 mov.w r3, r4, rrx\n", + " 22: ea5f 1804 movs.w r8, r4, lsl #4\n", + " 26: ea5f 1854 movs.w r8, r4, lsr #5\n", + " 2a: ea5f 18a4 movs.w r8, r4, asr #6\n", + " 2e: ea5f 18f4 movs.w r8, r4, ror #7\n", + " 32: ea5f 0834 movs.w r8, r4, rrx\n", + nullptr +}; const char* BasicLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: 7e23 ldrb r3, [r4, #24]\n", @@ -434,23 +452,115 @@ const char* MovWMovTResults[] = { const char* SpecialAddSubResults[] = { " 0: aa14 add r2, sp, #80 ; 0x50\n", " 2: b014 add sp, #80 ; 0x50\n", - " 4: f20d 0850 addw r8, sp, #80 ; 0x50\n", - " 8: f60d 7200 addw r2, sp, #3840 ; 0xf00\n", - " c: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n", - " 10: b094 sub sp, #80 ; 0x50\n", - " 12: f2ad 0050 subw r0, sp, #80 ; 0x50\n", - " 16: f2ad 0850 subw r8, sp, #80 ; 0x50\n", - " 1a: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n", + " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n", + " 8: f50d 6270 add.w r2, sp, #3840 ; 0xf00\n", + " c: f50d 6d70 add.w sp, sp, #3840 ; 0xf00\n", + " 10: f60d 7dfc addw sp, sp, #4092 ; 0xffc\n", + " 14: b094 sub sp, #80 ; 0x50\n", + " 16: f1ad 0050 sub.w r0, sp, #80 ; 0x50\n", + " 1a: f1ad 0850 sub.w r8, sp, #80 ; 0x50\n", + " 1e: f5ad 6d70 sub.w sp, sp, #3840 ; 0xf00\n", + " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n", + nullptr +}; +const char* LoadFromOffsetResults[] = { + " 0: 68e2 ldr r2, [r4, #12]\n", + " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " a: 6812 ldr r2, [r2, #0]\n", + " c: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 10: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 14: f241 0200 movw r2, #4096 ; 0x1000\n", + " 18: f2c0 0210 movt r2, #16\n", + " 1c: 4422 add r2, r4\n", + " 1e: 6812 ldr r2, [r2, #0]\n", + " 20: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 24: f2c0 0c10 movt ip, #16\n", + " 28: 4464 add r4, ip\n", + " 2a: 6824 ldr r4, [r4, #0]\n", + " 2c: 89a2 ldrh r2, [r4, #12]\n", + " 2e: f8b4 2fff ldrh.w r2, [r4, #4095] ; 0xfff\n", + " 32: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " 36: 8812 ldrh r2, [r2, #0]\n", + " 38: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 3c: f8b2 20a4 ldrh.w r2, [r2, #164] ; 0xa4\n", + " 40: f241 0200 movw r2, #4096 ; 0x1000\n", + " 44: f2c0 0210 movt r2, #16\n", + " 48: 4422 add r2, r4\n", + " 4a: 8812 ldrh r2, [r2, #0]\n", + " 4c: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 50: f2c0 0c10 movt ip, #16\n", + " 54: 4464 add r4, ip\n", + " 56: 8824 ldrh r4, [r4, #0]\n", + " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n", + " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020] ; 0x3fc\n", + " 60: f504 6280 add.w r2, r4, #1024 ; 0x400\n", + " 64: e9d2 2300 ldrd r2, r3, [r2]\n", + " 68: f504 2280 add.w r2, r4, #262144 ; 0x40000\n", + " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]; 0xa4\n", + " 70: f240 4200 movw r2, #1024 ; 0x400\n", + " 74: f2c0 0204 movt r2, #4\n", + " 78: 4422 add r2, r4\n", + " 7a: e9d2 2300 ldrd r2, r3, [r2]\n", + " 7e: f240 4c00 movw ip, #1024 ; 0x400\n", + " 82: f2c0 0c04 movt ip, #4\n", + " 86: 4464 add r4, ip\n", + " 88: e9d4 4500 ldrd r4, r5, [r4]\n", + " 8c: f8dc 000c ldr.w r0, [ip, #12]\n", + " 90: f5a4 1280 sub.w r2, r4, #1048576 ; 0x100000\n", + " 94: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 98: f994 200c ldrsb.w r2, [r4, #12]\n", + " 9c: 7b22 ldrb r2, [r4, #12]\n", + " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", nullptr }; const char* StoreToOffsetResults[] = { " 0: 60e2 str r2, [r4, #12]\n", - " 2: f44f 5c00 mov.w ip, #8192 ; 0x2000\n", - " 6: 44a4 add ip, r4\n", - " 8: f8cc 2000 str.w r2, [ip]\n", - " c: f8cc 000c str.w r0, [ip, #12]\n", - " 10: f8ac 000c strh.w r0, [ip, #12]\n", - " 14: f88c 200c strb.w r2, [ip, #12]\n", + " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " a: f8cc 2000 str.w r2, [ip]\n", + " e: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 12: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " 16: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 1a: f2c0 0c10 movt ip, #16\n", + " 1e: 44a4 add ip, r4\n", + " 20: f8cc 2000 str.w r2, [ip]\n", + " 24: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 28: f2c0 0c10 movt ip, #16\n", + " 2c: 44a4 add ip, r4\n", + " 2e: f8cc 4000 str.w r4, [ip]\n", + " 32: 81a2 strh r2, [r4, #12]\n", + " 34: f8a4 2fff strh.w r2, [r4, #4095] ; 0xfff\n", + " 38: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " 3c: f8ac 2000 strh.w r2, [ip]\n", + " 40: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 44: f8ac 20a4 strh.w r2, [ip, #164] ; 0xa4\n", + " 48: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 4c: f2c0 0c10 movt ip, #16\n", + " 50: 44a4 add ip, r4\n", + " 52: f8ac 2000 strh.w r2, [ip]\n", + " 56: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 5a: f2c0 0c10 movt ip, #16\n", + " 5e: 44a4 add ip, r4\n", + " 60: f8ac 4000 strh.w r4, [ip]\n", + " 64: e9c4 2303 strd r2, r3, [r4, #12]\n", + " 68: e9c4 23ff strd r2, r3, [r4, #1020] ; 0x3fc\n", + " 6c: f504 6c80 add.w ip, r4, #1024 ; 0x400\n", + " 70: e9cc 2300 strd r2, r3, [ip]\n", + " 74: f504 2c80 add.w ip, r4, #262144 ; 0x40000\n", + " 78: e9cc 2329 strd r2, r3, [ip, #164]; 0xa4\n", + " 7c: f240 4c00 movw ip, #1024 ; 0x400\n", + " 80: f2c0 0c04 movt ip, #4\n", + " 84: 44a4 add ip, r4\n", + " 86: e9cc 2300 strd r2, r3, [ip]\n", + " 8a: f240 4c00 movw ip, #1024 ; 0x400\n", + " 8e: f2c0 0c04 movt ip, #4\n", + " 92: 44a4 add ip, r4\n", + " 94: e9cc 4500 strd r4, r5, [ip]\n", + " 98: f8cc 000c str.w r0, [ip, #12]\n", + " 9c: f5a4 1c80 sub.w ip, r4, #1048576 ; 0x100000\n", + " a0: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " a4: 7322 strb r2, [r4, #12]\n", nullptr }; const char* IfThenResults[] = { @@ -4952,6 +5062,7 @@ void setup_results() { test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults; test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults; test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults; + test_results["ShiftImmediate"] = ShiftImmediateResults; test_results["BasicLoad"] = BasicLoadResults; test_results["BasicStore"] = BasicStoreResults; test_results["ComplexLoad"] = ComplexLoadResults; @@ -4966,6 +5077,7 @@ void setup_results() { test_results["StoreMultiple"] = StoreMultipleResults; test_results["MovWMovT"] = MovWMovTResults; test_results["SpecialAddSub"] = SpecialAddSubResults; + test_results["LoadFromOffset"] = LoadFromOffsetResults; test_results["StoreToOffset"] = StoreToOffsetResults; test_results["IfThen"] = IfThenResults; test_results["CbzCbnz"] = CbzCbnzResults; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index aee64120a8..fc7ac7061a 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -310,15 +310,27 @@ void MipsAssembler::Seh(Register rd, Register rt) { EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20); } +void MipsAssembler::Wsbh(Register rd, Register rt) { + EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20); +} + void MipsAssembler::Sll(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00); } void MipsAssembler::Srl(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02); } +void MipsAssembler::Rotr(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; + EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02); +} + void MipsAssembler::Sra(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03); } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 4038c1f1c4..1ef0992dac 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -135,9 +135,11 @@ class MipsAssembler FINAL : public Assembler { void Seb(Register rd, Register rt); // R2+ void Seh(Register rd, Register rt); // R2+ + void Wsbh(Register rd, Register rt); // R2+ void Sll(Register rd, Register rt, int shamt); void Srl(Register rd, Register rt, int shamt); + void Rotr(Register rd, Register rt, int shamt); // R2+ void Sra(Register rd, Register rt, int shamt); void Sllv(Register rd, Register rt, Register rs); void Srlv(Register rd, Register rt, Register rs); diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ba2525e555..107d5bb572 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -19,15 +19,73 @@ #include "base/bit_utils.h" #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "memory_region.h" #include "thread.h" namespace art { namespace mips64 { +void Mips64Assembler::FinalizeCode() { + for (auto& exception_block : exception_blocks_) { + EmitExceptionPoll(&exception_block); + } + PromoteBranches(); +} + +void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + Assembler::FinalizeInstructions(region); + PatchCFI(); +} + +void Mips64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Mips64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + void Mips64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, @@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) { CHECK_NE(rs, kNoGpuRegister); + CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | - (imm21 & 0x1FFFFF); + imm21; Emit(encoding); } -void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) { - uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | - (addr26 & 0x3FFFFFF); +void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) { + CHECK(IsUint<26>(imm26)) << imm26; + uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); } @@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xb, rs, rt, imm16); } -void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::J(uint32_t addr26) { - EmitJ(0x2, addr26); - Nop(); -} - -void Mips64Assembler::Jal(uint32_t addr26) { - EmitJ(0x3, addr26); - Nop(); -} - void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x35); } @@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) { void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) { EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09); - Nop(); } void Mips64Assembler::Jalr(GpuRegister rs) { @@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) { EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16); } +void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, imm19); +} + +void Mips64Assembler::Bc(uint32_t imm26) { + EmitI26(0x32, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) { @@ -569,6 +616,65 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::EmitBcondc(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { + switch (cond) { + case kCondLT: + Bltc(rs, rt, imm16_21); + break; + case kCondGE: + Bgec(rs, rt, imm16_21); + break; + case kCondLE: + Bgec(rt, rs, imm16_21); + break; + case kCondGT: + Bltc(rt, rs, imm16_21); + break; + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltzc(rs, imm16_21); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgezc(rs, imm16_21); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blezc(rs, imm16_21); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtzc(rs, imm16_21); + break; + case kCondEQ: + Beqc(rs, rt, imm16_21); + break; + case kCondNE: + Bnec(rs, rt, imm16_21); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqzc(rs, imm16_21); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnezc(rs, imm16_21); + break; + case kCondLTU: + Bltuc(rs, rt, imm16_21); + break; + case kCondGEU: + Bgeuc(rs, rt, imm16_21); + break; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -925,15 +1031,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } } -void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) { - if (IsInt<16>(value)) { - Addiu(rt, rs, value); - } else { - LoadConst32(rtmp, value); - Addu(rt, rs, rtmp); - } -} - void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -943,177 +1040,621 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp } } -// -// MIPS64R6 branches -// -// -// Unconditional (pc + 32-bit signed offset): -// -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Conditional (pc + 32-bit signed offset): -// -// b<cond>c reg, +2 // skip next 2 instructions -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Unconditional (pc + 32-bit signed offset) and link: -// -// auipc reg, ofs_high -// daddiu reg, ofs_low -// jialc reg, 0 -// // no delay/forbidden slot -// -// -// TODO: use shorter instruction sequences whenever possible. -// - -void Mips64Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int32_t bound_pc = buffer_.Size(); +void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size, + Mips64Assembler::Branch::Type short_type, + Mips64Assembler::Branch::Type long_type) { + type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; +} - // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label. - // Embed the previously unknown pc-relative addresses in them. - while (label->IsLinked()) { - int32_t position = label->Position(); - // Extract the branch (instruction pair) - uint32_t auipc = buffer_.Load<uint32_t>(position); - uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu +void Mips64Assembler::Branch::InitializeType(bool is_call) { + OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + if (is_call) { + InitShortOrLong(offset_size, kCall, kLongCall); + } else if (condition_ == kUncond) { + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + } else { + if (condition_ == kCondEQZ || condition_ == kCondNEZ) { + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + } else { + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + } + } + old_type_ = type_; +} + +bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { + switch (condition) { + case kCondLT: + case kCondGT: + case kCondNE: + case kCondLTU: + return lhs == rhs; + default: + return false; + } +} + +bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondGE: + case kCondLE: + case kCondEQ: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(ZERO), + rhs_reg_(ZERO), + condition_(kUncond) { + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Mips64Assembler::BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + CHECK_NE(condition, kUncond); + switch (condition) { + case kCondEQ: + case kCondNE: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + CHECK_NE(lhs_reg, ZERO); + CHECK_NE(rhs_reg, ZERO); + break; + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + CHECK_NE(lhs_reg, ZERO); + CHECK_EQ(rhs_reg, ZERO); + break; + case kUncond: + UNREACHABLE(); + } + CHECK(!IsNop(condition, lhs_reg, rhs_reg)); + if (IsUncond(condition, lhs_reg, rhs_reg)) { + // Branch condition is always true, make the branch unconditional. + condition_ = kUncond; + } + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(indirect_reg), + rhs_reg_(ZERO), + condition_(kUncond) { + CHECK_NE(indirect_reg, ZERO); + CHECK_NE(indirect_reg, AT); + InitializeType(true); +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( + Mips64Assembler::BranchCondition cond) { + switch (cond) { + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTZ: + return kCondGEZ; + case kCondGEZ: + return kCondLTZ; + case kCondLEZ: + return kCondGTZ; + case kCondGTZ: + return kCondLEZ; + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondEQZ: + return kCondNEZ; + case kCondNEZ: + return kCondEQZ; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + } + UNREACHABLE(); +} + +Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const { + return type_; +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const { + return condition_; +} + +GpuRegister Mips64Assembler::Branch::GetLeftRegister() const { + return lhs_reg_; +} + +GpuRegister Mips64Assembler::Branch::GetRightRegister() const { + return rhs_reg_; +} + +uint32_t Mips64Assembler::Branch::GetTarget() const { + return target_; +} - // Extract the location of the previous pair in the list (walking the list backwards; - // the previous pair location was stored in the immediate operands of the instructions) - int32_t prev = (auipc << 16) | (jic & 0xFFFF); +uint32_t Mips64Assembler::Branch::GetLocation() const { + return location_; +} + +uint32_t Mips64Assembler::Branch::GetOldLocation() const { + return old_location_; +} + +uint32_t Mips64Assembler::Branch::GetLength() const { + return branch_info_[type_].length; +} + +uint32_t Mips64Assembler::Branch::GetOldLength() const { + return branch_info_[old_type_].length; +} + +uint32_t Mips64Assembler::Branch::GetSize() const { + return GetLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOldSize() const { + return GetOldLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetEndLocation() const { + return GetLocation() + GetSize(); +} + +uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldSize(); +} + +bool Mips64Assembler::Branch::IsLong() const { + switch (type_) { + // Short branches. + case kUncondBranch: + case kCondBranch: + case kCall: + return false; + // Long branches. + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + return true; + } + UNREACHABLE(); +} + +bool Mips64Assembler::Branch::IsResolved() const { + return target_ != kUnresolved; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + OffsetBits offset_size = + (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + ? kOffset23 + : branch_info_[type_].offset_size; + return offset_size; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location, + uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) + return kOffset16; + int64_t distance = static_cast<int64_t>(target) - location; + // To simplify calculations in composite branches consisting of multiple instructions + // bump up the distance by a value larger than the max byte size of a composite branch. + distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize; + if (IsInt<kOffset16>(distance)) + return kOffset16; + else if (IsInt<kOffset18>(distance)) + return kOffset18; + else if (IsInt<kOffset21>(distance)) + return kOffset21; + else if (IsInt<kOffset23>(distance)) + return kOffset23; + else if (IsInt<kOffset28>(distance)) + return kOffset28; + return kOffset32; +} + +void Mips64Assembler::Branch::Resolve(uint32_t target) { + target_ = target; +} + +void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + if (location_ > expand_location) { + location_ += delta; + } + if (!IsResolved()) { + return; // Don't know the target yet. + } + if (target_ > expand_location) { + target_ += delta; + } +} + +void Mips64Assembler::Branch::PromoteToLong() { + switch (type_) { + // Short branches. + case kUncondBranch: + type_ = kLongUncondBranch; + break; + case kCondBranch: + type_ = kLongCondBranch; + break; + case kCall: + type_ = kLongCall; + break; + default: + // Note: 'type_' is already long. + break; + } + CHECK(IsLong()); +} + +uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { + // If the branch is still unresolved or already long, nothing to do. + if (IsLong() || !IsResolved()) { + return 0; + } + // Promote the short branch to long if the offset size is too small + // to hold the distance between location_ and target_. + if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + // The following logic is for debugging/testing purposes. + // Promote some short branches to long when it's not really required. + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + int64_t distance = static_cast<int64_t>(target_) - location_; + distance = (distance >= 0) ? distance : -distance; + if (distance >= max_short_distance) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + } + return 0; +} + +uint32_t Mips64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + // Prepare the offset for encoding into the instruction(s). + offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; + return offset; +} - // Get the pc-relative address - uint32_t offset = bound_pc - position; - offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu +Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Mips64Assembler::Bind(Mips64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); - // Embed it in the two instructions - auipc = (auipc & 0xFFFF0000) | (offset >> 16); - jic = (jic & 0xFFFF0000) | (offset & 0xFFFF); + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); - // Save the adjusted instructions - buffer_.Store<uint32_t>(position, auipc); - buffer_.Store<uint32_t>(position + 4, jic); + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); // On to the previous branch in the list... label->position_ = prev; } - // Now make the label object contain its own location - // (it will be used by the branches referring to and following this label) + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + label->prev_branch_id_plus_one_ = branches_.size(); + if (label->prev_branch_id_plus_one_) { + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + bound_pc -= branch->GetEndLocation(); + } label->BindTo(bound_pc); } -void Mips64Assembler::B(Label* label) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in jic - Auipc(AT, offset >> 16); - Jic(AT, offset); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(AT, prev >> 16); - Jic(AT, prev); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_plus_one_) { + // Get label location based on the branch preceding it. + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + target += branch->GetEndLocation(); + } + return target; +} + +uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetSize() - branch->GetOldSize(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { + uint32_t length = branches_.back().GetLength(); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + while (length--) { + Nop(); } } -void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in daddiu - Auipc(indirect_reg, offset >> 16); - Daddiu(indirect_reg, indirect_reg, offset); - Jialc(indirect_reg, 0); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(indirect_reg, prev >> 16); - Daddiu(indirect_reg, indirect_reg, prev); - Jialc(indirect_reg, 0); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +void Mips64Assembler::Buncond(Mips64Label* label) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, indirect_reg); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0; ) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } +} + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { + // Short branches. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch + { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + // Long branches. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch + { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch + { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall +}; + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + uint32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + GpuRegister lhs = branch->GetLeftRegister(); + GpuRegister rhs = branch->GetRightRegister(); + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondc(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden slot. + break; + case Branch::kCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + Jialc(lhs, 0); + break; + + // Long branches. + case Branch::kLongUncondBranch: + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCondBranch: + EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(lhs, High16Bits(offset)); + Daddiu(lhs, lhs, Low16Bits(offset)); + Jialc(lhs, 0); + break; } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgec(rs, rt, 2); - B(label); +void Mips64Assembler::Bc(Mips64Label* label) { + Buncond(label); } -void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) { - Bgezc(rt, 2); - B(label); +void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { + Call(label, indirect_reg); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) { - Blezc(rt, 2); - B(label); +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLT, rs, rt); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) { - Bltc(rs, rt, 2); - B(label); +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTZ, rt); } -void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) { - Bltzc(rt, 2); - B(label); +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGTZ, rt); } -void Mips64Assembler::Blezc(GpuRegister rt, Label* label) { - Bgtzc(rt, 2); - B(label); +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGE, rs, rt); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgeuc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEZ, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bltuc(rs, rt, 2); - B(label); +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLEZ, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) { - Bnec(rs, rt, 2); - B(label); +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTU, rs, rt); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) { - Beqc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEU, rs, rt); } -void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) { - Bnezc(rs, 2); - B(label); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondEQ, rs, rt); } -void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) { - Beqzc(rs, 2); - B(label); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondNE, rs, rt); +} + +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondEQZ, rs); +} + +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondNEZ, rs); } void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1256,6 +1797,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1298,6 +1840,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address @@ -1316,6 +1859,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Then jump to the return address. Jr(RA); + Nop(); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -1324,12 +1868,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(-adjust)); cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(adjust)); cfi_.AdjustCFAOffset(-adjust); } @@ -1379,17 +1925,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) { - Mips64ManagedRegister scratch = mscratch.AsMips64(); - CHECK(scratch.IsGpuRegister()) << scratch; - // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?). - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsGpuRegister(), imm); - StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); -} - -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1398,7 +1934,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -1415,7 +1951,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -1449,18 +1987,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, } void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<8> offs) { + ThreadOffset<kMipsDoublewordSize> offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); } -void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64"; } -void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64"; } void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { @@ -1492,7 +2032,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, } void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, + ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; @@ -1500,7 +2040,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1561,9 +2101,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, @@ -1584,15 +2127,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset -/*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } -void Mips64Assembler::MemoryBarrier(ManagedRegister) { +void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) { // TODO: sync? - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, @@ -1604,7 +2150,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; CHECK(out_reg.IsGpuRegister()) << out_reg; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) @@ -1631,7 +2177,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is @@ -1653,7 +2199,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Mips64ManagedRegister in_reg = min_reg.AsMips64(); CHECK(out_reg.IsGpuRegister()) << out_reg; CHECK(in_reg.IsGpuRegister()) << in_reg; - Label null_arg; + Mips64Label null_arg; if (!out_reg.Equals(in_reg)) { LoadConst32(out_reg.AsGpuRegister(), 0); } @@ -1663,11 +2209,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Bind(&null_arg); } -void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } -void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } @@ -1679,6 +2227,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), base.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } @@ -1691,11 +2240,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { @@ -1703,37 +2254,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { } void Mips64Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*mscratch*/) { + ManagedRegister mscratch ATTRIBUTE_UNUSED) { StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); } void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { Mips64ManagedRegister scratch = mscratch.AsMips64(); - Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), - S1, Thread::ExceptionOffset<8>().Int32Value()); - Bnezc(scratch.AsGpuRegister(), slow->Entry()); -} - -void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { - Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); + exception_blocks_.emplace_back(scratch, stack_adjust); + LoadFromOffset(kLoadDoubleword, + scratch.AsGpuRegister(), + S1, + Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value()); + Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); +} + +void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { + Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); } - // Pass exception object as argument - // Don't care about preserving A0 as this call won't return - __ Move(A0, scratch_.AsGpuRegister()); + // Pass exception object as argument. + // Don't care about preserving A0 as this call won't return. + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); + Move(A0, exception->scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException - __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); - // TODO: check T9 usage - __ Jr(T9); + LoadFromOffset(kLoadDoubleword, + T9, + S1, + QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value()); + Jr(T9); + Nop(); + // Call never returns - __ Break(); -#undef __ + Break(); } } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 42962bca20..57fc19a6e9 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,18 +17,22 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <utility> #include <vector> #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" -#include "utils/assembler.h" #include "offsets.h" +#include "utils/assembler.h" +#include "utils/label.h" namespace art { namespace mips64 { +static constexpr size_t kMipsDoublewordSize = 8; + enum LoadOperandType { kLoadSignedByte, kLoadUnsignedByte, @@ -60,10 +64,57 @@ enum FPClassMaskType { kPositiveZero = 0x200, }; +class Mips64Label : public Label { + public: + Mips64Label() : prev_branch_id_plus_one_(0) {} + + Mips64Label(Mips64Label&& src) + : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + + private: + uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64Label); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null. +class Mips64ExceptionSlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), + exception_entry_(std::move(src.exception_entry_)) {} + + private: + Mips64Label* Entry() { return &exception_entry_; } + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; + Mips64Label exception_entry_; + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); +}; + class Mips64Assembler FINAL : public Assembler { public: - Mips64Assembler() {} - virtual ~Mips64Assembler() {} + Mips64Assembler() + : overwriting_(false), + overwrite_location_(0), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Mips64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler { void Dclz(GpuRegister rd, GpuRegister rs); void Dclo(GpuRegister rd, GpuRegister rs); - void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void J(uint32_t addr26); - void Jal(uint32_t addr26); void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); void Auipc(GpuRegister rs, uint16_t imm16); + void Addiupc(GpuRegister rs, uint32_t imm19); + void Bc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -240,32 +289,34 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); - // Higher level composite instructions + // Higher level composite instructions. void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 - void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { - B(label); + void Bind(Label* label) OVERRIDE { + Bind(down_cast<Mips64Label*>(label)); + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } - void B(Label* label); - void Jalr(Label* label, GpuRegister indirect_reg = RA); - // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); - void Bltzc(GpuRegister rt, Label* label); - void Bgtzc(GpuRegister rt, Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); - void Bgezc(GpuRegister rt, Label* label); - void Blezc(GpuRegister rt, Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); - void Beqzc(GpuRegister rs, Label* label); - void Bnezc(GpuRegister rs, Label* label); + + void Bind(Mips64Label* label); + void Bc(Mips64Label* label); + void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bltzc(GpuRegister rt, Mips64Label* label); + void Bgtzc(GpuRegister rt, Mips64Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgezc(GpuRegister rt, Mips64Label* label); + void Blezc(GpuRegister rt, Mips64Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqzc(GpuRegister rs, Mips64Label* label); + void Bnezc(GpuRegister rs, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -277,43 +328,42 @@ class Mips64Assembler FINAL : public Assembler { void Emit(uint32_t value); // - // Overridden common assembler high-level functionality + // Overridden common assembler high-level functionality. // - // Emit code that will create an activation on the stack + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - // Emit code that will remove an activation from the stack + // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; - // Store routines + // Store routines. void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; - // Load routines + // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + void LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -322,15 +372,16 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE; - // Copying routines + // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; @@ -354,13 +405,13 @@ class Mips64Assembler FINAL : public Assembler { void MemoryBarrier(ManagedRegister) OVERRIDE; - // Sign extension + // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Zero extension + // Zero extension. void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Exploit fast access in managed code to Thread::Current() + // Exploit fast access in managed code to Thread::Current(). void GetCurrentThread(ManagedRegister tr) OVERRIDE; void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; @@ -376,7 +427,7 @@ class Mips64Assembler FINAL : public Assembler { void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, bool null_allowed) OVERRIDE; - // src holds a handle scope entry (Object**) load this into dst + // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; // Heap::VerifyObject on src. In some cases (such as a reference to this) we @@ -384,37 +435,253 @@ class Mips64Assembler FINAL : public Assembler { void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - // Call to address held at [base+offset] + // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset, + ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + // Emit slow paths queued during assembly and promote short branches to long if needed. + void FinalizeCode() OVERRIDE; + + // Emit branches and finalize all instructions. + void FinalizeInstructions(const MemoryRegion& region); + + // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, + // must be used instead of Mips64Label::GetPosition()). + uint32_t GetLabelLocation(Mips64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + enum BranchCondition { + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTZ, + kCondGEZ, + kCondLEZ, + kCondGTZ, + kCondEQ, + kCondNE, + kCondEQZ, + kCondNEZ, + kCondLTU, + kCondGEU, + kUncond, + }; + friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + private: + class Branch { + public: + enum Type { + // Short branches. + kUncondBranch, + kCondBranch, + kCall, + // Long branches. + kLongUncondBranch, + kLongCondBranch, + kLongCall, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset16 = 16, + kOffset18 = 18, + kOffset21 = 21, + kOffset23 = 23, + kOffset28 = 28, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr int32_t kMaxBranchLength = 32; + static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + + struct BranchInfo { + // Branch length as a number of 4-byte-long instructions. + uint32_t length; + // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's + // PC-relative offset (or its most significant 16-bit half, which goes first). + uint32_t instr_offset; + // Different MIPS instructions with PC-relative offsets apply said offsets to slightly + // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte + // instructions) from the instruction containing the offset. + uint32_t pc_org; + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is + // an exception: use kOffset23 for beqzc/bnezc). + OffsetBits offset_size; + // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift + // count. + int offset_shift; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch. + Branch(uint32_t location, uint32_t target); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg = ZERO); + // Call (branch and link) that stores the target address in a given register (i.e. T9). + Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. + // So, we need a way to identify such branches in order to emit no instructions for them + // or change them to unconditional. + static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + GpuRegister GetLeftRegister() const; + GpuRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetSize() const; + uint32_t GetOldSize() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsLong() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + // + // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), + // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. + // + // Composite branches (made of several instructions) with longer reach have 32-bit + // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). + // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, + // however. Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. + // + // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special + // case with the addiu instruction and a 16 bit offset. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If the branch is short, changes its type to long. + void PromoteToLong(); + + // If necessary, updates the type by promoting a short branch to a long branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + // max_short_distance caps the maximum distance between location_ and target_ + // that is allowed for short branches. This is for debugging/testing purposes. + // max_short_distance = 0 forces all short branches to become long. + // Use the implicit default argument when not debugging/testing. + uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + + // Returns the location of the instruction(s) containing the offset. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + uint32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(bool is_call); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + GpuRegister lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + GpuRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); + friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); - void EmitJ(int opcode, uint32_t addr26); + void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); - DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); -}; + void Buncond(Mips64Label* label); + void Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs = ZERO); + void Call(Mips64Label* label, GpuRegister indirect_reg); + void FinalizeLabeledBranch(Mips64Label* label); -// Slowpath entered when Thread::Current()->_exception is non-null -class Mips64ExceptionSlowPath FINAL : public SlowPath { - public: - Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const Mips64ManagedRegister scratch_; - const size_t stack_adjust_; + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void PromoteBranches(); + void EmitBranch(Branch* branch); + void EmitBranches(); + void PatchCFI(); + + // Emits exception block. + void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + + // List of exception blocks to generate at the end of the code cache. + std::vector<Mips64ExceptionSlowPath> exception_blocks_; + + std::vector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 4413906fd7..29a5a88316 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -24,6 +24,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPS64CpuRegisterCompare { @@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return "mips64"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -march=mips64r6"; + // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 + // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return secondary_register_names_[reg]; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips64::GpuRegister*> registers_; std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; @@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - /////////////////// // FP Operations // /////////////////// @@ -348,7 +432,203 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { //////////////// TEST_F(AssemblerMIPS64Test, Jalr) { - DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); + DriverStr(".set noreorder\n" + + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + +TEST_F(AssemblerMIPS64Test, Jialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + std::string expected = + ".set noreorder\n" + "lapc $t9, 1f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "lapc $t9, 2f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "lapc $t9, 1b\n" + "jialc $t9, 0\n"; + DriverStr(expected, "Jialc"); +} + +TEST_F(AssemblerMIPS64Test, LongJialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr uint32_t kAdduCount1 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr uint32_t kAdduCount2 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "1:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "2:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "jialc $t9, 0\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongJialc"); +} + +TEST_F(AssemblerMIPS64Test, Bc) { + mips64::Mips64Label label1, label2; + __ Bc(&label1); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Bc(&label2); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Bc(&label1); + + std::string expected = + ".set noreorder\n" + "bc 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc 1b\n"; + DriverStr(expected, "Bc"); +} + +TEST_F(AssemblerMIPS64Test, Beqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS64Test, Bnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS64Test, Bltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS64Test, Bgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS64Test, Blezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS64Test, Bgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS64Test, Beqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS64Test, Bnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS64Test, Bltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS64Test, Bgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS64Test, Bltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS64Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS64Test, LongBeqc) { + mips64::Mips64Label label; + __ Beqc(mips64::A0, mips64::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Beqc(mips64::A2, mips64::A3, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } ////////// |