summaryrefslogtreecommitdiff
path: root/compiler/utils
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/utils')
-rw-r--r--compiler/utils/arm/assembler_arm.cc8
-rw-r--r--compiler/utils/arm/assembler_arm32.h2
-rw-r--r--compiler/utils/arm/assembler_thumb2.cc129
-rw-r--r--compiler/utils/arm/assembler_thumb2.h14
-rw-r--r--compiler/utils/arm/assembler_thumb2_test.cc40
-rw-r--r--compiler/utils/assembler_test.h5
-rw-r--r--compiler/utils/assembler_thumb_test.cc103
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc142
-rw-r--r--compiler/utils/mips/assembler_mips.cc12
-rw-r--r--compiler/utils/mips/assembler_mips.h2
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc1007
-rw-r--r--compiler/utils/mips64/assembler_mips64.h387
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc286
13 files changed, 1763 insertions, 374 deletions
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 68e39568bb..dead8fd9a8 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -342,9 +342,9 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
return IsAbsoluteUint<12>(offset);
case kLoadSWord:
case kLoadDWord:
- return IsAbsoluteUint<10>(offset); // VFP addressing mode.
+ return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode.
case kLoadWordPair:
- return IsAbsoluteUint<10>(offset);
+ return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -360,9 +360,9 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
return IsAbsoluteUint<12>(offset);
case kStoreSWord:
case kStoreDWord:
- return IsAbsoluteUint<10>(offset); // VFP addressing mode.
+ return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode.
case kStoreWordPair:
- return IsAbsoluteUint<10>(offset);
+ return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 5233dcbbb0..ce3a87275d 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -389,8 +389,6 @@ class Arm32Assembler FINAL : public ArmAssembler {
void EmitBranch(Condition cond, Label* label, bool link);
static int32_t EncodeBranchOffset(int offset, int32_t inst);
static int DecodeBranchOffset(int32_t inst);
- int32_t EncodeTstOffset(int offset, int32_t inst);
- int DecodeTstOffset(int32_t inst);
bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op);
};
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 297cc54e29..7ad5b440e0 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -1349,7 +1349,8 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
int32_t encoding = 0;
if (so.IsImmediate()) {
// Check special cases.
- if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
+ if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) &&
+ /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) {
if (set_cc != kCcSet) {
if (opcode == SUB) {
thumb_opcode = 5U;
@@ -3220,7 +3221,7 @@ void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
CheckCondition(cond);
- EmitShift(rd, rm, RRX, rm, cond, set_cc);
+ EmitShift(rd, rm, RRX, 0, cond, set_cc);
}
@@ -3469,6 +3470,73 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond)
}
}
+int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
+ switch (type) {
+ case kLoadSignedByte:
+ case kLoadSignedHalfword:
+ case kLoadUnsignedHalfword:
+ case kLoadUnsignedByte:
+ case kLoadWord:
+ // We can encode imm12 offset.
+ return 0xfffu;
+ case kLoadSWord:
+ case kLoadDWord:
+ case kLoadWordPair:
+ // We can encode imm8:'00' offset.
+ return 0xff << 2;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
+int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
+ switch (type) {
+ case kStoreHalfword:
+ case kStoreByte:
+ case kStoreWord:
+ // We can encode imm12 offset.
+ return 0xfff;
+ case kStoreSWord:
+ case kStoreDWord:
+ case kStoreWordPair:
+ // We can encode imm8:'00' offset.
+ return 0xff << 2;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
+bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+ int32_t offset,
+ /*out*/ int32_t* add_to_base,
+ /*out*/ int32_t* offset_for_load_store) {
+ int32_t other_bits = offset & ~allowed_offset_bits;
+ if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
+ *add_to_base = offset & ~allowed_offset_bits;
+ *offset_for_load_store = offset & allowed_offset_bits;
+ return true;
+ }
+ return false;
+}
+
+int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+ Register temp,
+ Register base,
+ int32_t offset,
+ Condition cond) {
+ DCHECK_NE(offset & ~allowed_offset_bits, 0);
+ int32_t add_to_base, offset_for_load;
+ if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+ AddConstant(temp, base, add_to_base, cond, kCcKeep);
+ return offset_for_load;
+ } else {
+ LoadImmediate(temp, offset, cond);
+ add(temp, temp, ShifterOperand(base), cond, kCcKeep);
+ return 0;
+ }
+}
// Implementation note: this method must emit at most one instruction when
// Address::CanHoldLoadOffsetThumb.
@@ -3479,12 +3547,26 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type,
Condition cond) {
if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
CHECK_NE(base, IP);
- LoadImmediate(IP, offset, cond);
- add(IP, IP, ShifterOperand(base), cond);
- base = IP;
- offset = 0;
+ // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
+ int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
+ DCHECK_NE(offset & ~allowed_offset_bits, 0);
+ int32_t add_to_base, offset_for_load;
+ if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+ // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+ AddConstant(reg, base, add_to_base, cond, kCcKeep);
+ base = reg;
+ offset = offset_for_load;
+ } else {
+ Register temp = (reg == base) ? IP : reg;
+ LoadImmediate(temp, offset, cond);
+ // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
+ // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+ add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep);
+ base = reg;
+ offset = 0;
+ }
}
- CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+ DCHECK(Address::CanHoldLoadOffsetThumb(type, offset));
switch (type) {
case kLoadSignedByte:
ldrsb(reg, Address(base, offset), cond);
@@ -3510,7 +3592,6 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type,
}
}
-
// Implementation note: this method must emit at most one instruction when
// Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
void Thumb2Assembler::LoadSFromOffset(SRegister reg,
@@ -3519,12 +3600,10 @@ void Thumb2Assembler::LoadSFromOffset(SRegister reg,
Condition cond) {
if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
CHECK_NE(base, IP);
- LoadImmediate(IP, offset, cond);
- add(IP, IP, ShifterOperand(base), cond);
+ offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond);
base = IP;
- offset = 0;
}
- CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+ DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
vldrs(reg, Address(base, offset), cond);
}
@@ -3537,12 +3616,10 @@ void Thumb2Assembler::LoadDFromOffset(DRegister reg,
Condition cond) {
if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
CHECK_NE(base, IP);
- LoadImmediate(IP, offset, cond);
- add(IP, IP, ShifterOperand(base), cond);
+ offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond);
base = IP;
- offset = 0;
}
- CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+ DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
vldrd(reg, Address(base, offset), cond);
}
@@ -3573,12 +3650,12 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type,
offset += kRegisterSize;
}
}
- LoadImmediate(tmp_reg, offset, cond);
- add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
+ // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
+ // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
+ offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond);
base = tmp_reg;
- offset = 0;
}
- CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+ DCHECK(Address::CanHoldStoreOffsetThumb(type, offset));
switch (type) {
case kStoreByte:
strb(reg, Address(base, offset), cond);
@@ -3611,12 +3688,10 @@ void Thumb2Assembler::StoreSToOffset(SRegister reg,
Condition cond) {
if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
CHECK_NE(base, IP);
- LoadImmediate(IP, offset, cond);
- add(IP, IP, ShifterOperand(base), cond);
+ offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond);
base = IP;
- offset = 0;
}
- CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+ DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
vstrs(reg, Address(base, offset), cond);
}
@@ -3629,12 +3704,10 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg,
Condition cond) {
if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
CHECK_NE(base, IP);
- LoadImmediate(IP, offset, cond);
- add(IP, IP, ShifterOperand(base), cond);
+ offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond);
base = IP;
- offset = 0;
}
- CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+ DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
vstrd(reg, Address(base, offset), cond);
}
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index e18361300a..9aeece8e57 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -729,13 +729,23 @@ class Thumb2Assembler FINAL : public ArmAssembler {
void EmitBranch(Condition cond, Label* label, bool link, bool x);
static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
static int DecodeBranchOffset(int32_t inst);
- int32_t EncodeTstOffset(int offset, int32_t inst);
- int DecodeTstOffset(int32_t inst);
void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
Condition cond = AL, SetCc set_cc = kCcDontCare);
void EmitShift(Register rd, Register rn, Shift shift, Register rm,
Condition cond = AL, SetCc set_cc = kCcDontCare);
+ static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
+ static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
+ bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+ int32_t offset,
+ /*out*/ int32_t* add_to_base,
+ /*out*/ int32_t* offset_for_load_store);
+ int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+ Register temp,
+ Register base,
+ int32_t offset,
+ Condition cond);
+
// Whether the assembler can relocate branches. If false, unresolved branches will be
// emitted on 32bits.
bool can_relocate_branches_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index cb4b20b5ba..7b32b0fd26 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -243,7 +243,7 @@ TEST_F(AssemblerThumb2Test, sub) {
const char* expected =
"subs r1, r0, #42\n"
- "subw r1, r0, #42\n"
+ "sub.w r1, r0, #42\n"
"subs r1, r0, r2, asr #31\n"
"sub r1, r0, r2, asr #31\n";
DriverStr(expected, "sub");
@@ -257,7 +257,7 @@ TEST_F(AssemblerThumb2Test, add) {
const char* expected =
"adds r1, r0, #42\n"
- "addw r1, r0, #42\n"
+ "add.w r1, r0, #42\n"
"adds r1, r0, r2, asr #31\n"
"add r1, r0, r2, asr #31\n";
DriverStr(expected, "add");
@@ -305,21 +305,18 @@ TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) {
__ StoreToOffset(type, arm::IP, arm::R5, offset);
const char* expected =
- "mov ip, #4096\n" // LoadImmediate(ip, 4096)
- "add ip, ip, sp\n"
+ "add.w ip, sp, #4096\n" // AddConstant(ip, sp, 4096)
"str r0, [ip, #0]\n"
- "str r5, [sp, #-4]!\n" // Push(r5)
- "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize)
- "add r5, r5, sp\n"
- "str ip, [r5, #0]\n"
- "ldr r5, [sp], #4\n" // Pop(r5)
-
- "str r6, [sp, #-4]!\n" // Push(r6)
- "mov r6, #4096\n" // LoadImmediate(r6, 4096)
- "add r6, r6, r5\n"
- "str ip, [r6, #0]\n"
- "ldr r6, [sp], #4\n"; // Pop(r6)
+ "str r5, [sp, #-4]!\n" // Push(r5)
+ "add.w r5, sp, #4096\n" // AddConstant(r5, 4100 & ~0xfff)
+ "str ip, [r5, #4]\n" // StoreToOffset(type, ip, r5, 4100 & 0xfff)
+ "ldr r5, [sp], #4\n" // Pop(r5)
+
+ "str r6, [sp, #-4]!\n" // Push(r6)
+ "add.w r6, r5, #4096\n" // AddConstant(r6, r5, 4096 & ~0xfff)
+ "str ip, [r6, #0]\n" // StoreToOffset(type, ip, r6, 4096 & 0xfff)
+ "ldr r6, [sp], #4\n"; // Pop(r6)
DriverStr(expected, "StoreWordToNonThumbOffset");
}
@@ -360,20 +357,17 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) {
__ StoreToOffset(type, arm::R11, arm::R5, offset);
const char* expected =
- "mov ip, #1024\n" // LoadImmediate(ip, 1024)
- "add ip, ip, sp\n"
+ "add.w ip, sp, #1024\n" // AddConstant(ip, sp, 1024)
"strd r0, r1, [ip, #0]\n"
"str r5, [sp, #-4]!\n" // Push(r5)
- "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize)
- "add r5, r5, sp\n"
- "strd r11, ip, [r5, #0]\n"
+ "add.w r5, sp, #1024\n" // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc)
+ "strd r11, ip, [r5, #4]\n" // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc)
"ldr r5, [sp], #4\n" // Pop(r5)
"str r6, [sp, #-4]!\n" // Push(r6)
- "mov r6, #1024\n" // LoadImmediate(r6, 1024)
- "add r6, r6, r5\n"
- "strd r11, ip, [r6, #0]\n"
+ "add.w r6, r5, #1024\n" // AddConstant(r6, r5, 1024 & ~0x3fc)
+ "strd r11, ip, [r6, #0]\n" // StoreToOffset(type, r11, r6, 1024 & 0x3fc)
"ldr r6, [sp], #4\n"; // Pop(r6)
DriverStr(expected, "StoreWordPairToNonThumbOffset");
}
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index f1233ca457..9457da1c36 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -840,12 +840,17 @@ class AssemblerTest : public testing::Test {
return str;
}
+ // Override this to pad the code with NOPs to a certain size if needed.
+ virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+ }
+
void DriverWrapper(std::string assembly_text, std::string test_name) {
assembler_->FinalizeCode();
size_t cs = assembler_->CodeSize();
std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
MemoryRegion code(&(*data)[0], data->size());
assembler_->FinalizeInstructions(code);
+ Pad(*data);
test_helper_->Driver(*data, assembly_text, test_name);
}
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2ae88413e7..1de51a2dc8 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -466,6 +466,38 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
}
+TEST(Thumb2AssemblerTest, ShiftImmediate) {
+ // Note: This test produces the same results as DataProcessingShiftedRegister
+ // but it does so using shift functions instead of mov().
+ arm::Thumb2Assembler assembler;
+
+ // 16-bit variants.
+ __ Lsl(R3, R4, 4);
+ __ Lsr(R3, R4, 5);
+ __ Asr(R3, R4, 6);
+
+ // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+ __ Ror(R3, R4, 7);
+
+ // 32-bit RRX because RRX has no 16-bit version.
+ __ Rrx(R3, R4);
+
+ // 32 bit variants (not setting condition codes).
+ __ Lsl(R3, R4, 4, AL, kCcKeep);
+ __ Lsr(R3, R4, 5, AL, kCcKeep);
+ __ Asr(R3, R4, 6, AL, kCcKeep);
+ __ Ror(R3, R4, 7, AL, kCcKeep);
+ __ Rrx(R3, R4, AL, kCcKeep);
+
+ // 32 bit variants (high registers).
+ __ Lsls(R8, R4, 4);
+ __ Lsrs(R8, R4, 5);
+ __ Asrs(R8, R4, 6);
+ __ Rors(R8, R4, 7);
+ __ Rrxs(R8, R4);
+
+ EmitAndCheck(&assembler, "ShiftImmediate");
+}
TEST(Thumb2AssemblerTest, BasicLoad) {
arm::Thumb2Assembler assembler;
@@ -823,29 +855,80 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) {
__ add(R2, SP, ShifterOperand(0xf00)); // 32 bit due to imm size.
__ add(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size.
+ __ add(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4.
- __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit
- __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit
- __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit.
+ __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit
+ __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit
+ __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit.
- __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size
+ __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size
+ __ sub(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4.
EmitAndCheck(&assembler, "SpecialAddSub");
}
+TEST(Thumb2AssemblerTest, LoadFromOffset) {
+ arm::Thumb2Assembler assembler;
+
+ __ LoadFromOffset(kLoadWord, R2, R4, 12);
+ __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
+ __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
+ __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4);
+ __ LoadFromOffset(kLoadWord, R2, R4, 0x101000);
+ __ LoadFromOffset(kLoadWord, R4, R4, 0x101000);
+ __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12);
+ __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff);
+ __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000);
+ __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4);
+ __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000);
+ __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000);
+ __ LoadFromOffset(kLoadWordPair, R2, R4, 12);
+ __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc);
+ __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400);
+ __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4);
+ __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400);
+ __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400);
+
+ __ LoadFromOffset(kLoadWord, R0, R12, 12); // 32-bit because of R12.
+ __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000);
+
+ __ LoadFromOffset(kLoadSignedByte, R2, R4, 12);
+ __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12);
+ __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12);
+
+ EmitAndCheck(&assembler, "LoadFromOffset");
+}
+
TEST(Thumb2AssemblerTest, StoreToOffset) {
arm::Thumb2Assembler assembler;
- __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple
- __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big.
- __ StoreToOffset(kStoreWord, R0, R12, 12);
- __ StoreToOffset(kStoreHalfword, R0, R12, 12);
- __ StoreToOffset(kStoreByte, R2, R12, 12);
+ __ StoreToOffset(kStoreWord, R2, R4, 12);
+ __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
+ __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
+ __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4);
+ __ StoreToOffset(kStoreWord, R2, R4, 0x101000);
+ __ StoreToOffset(kStoreWord, R4, R4, 0x101000);
+ __ StoreToOffset(kStoreHalfword, R2, R4, 12);
+ __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff);
+ __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000);
+ __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4);
+ __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000);
+ __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000);
+ __ StoreToOffset(kStoreWordPair, R2, R4, 12);
+ __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc);
+ __ StoreToOffset(kStoreWordPair, R2, R4, 0x400);
+ __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4);
+ __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400);
+ __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400);
+
+ __ StoreToOffset(kStoreWord, R0, R12, 12); // 32-bit because of R12.
+ __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000);
+
+ __ StoreToOffset(kStoreByte, R2, R4, 12);
EmitAndCheck(&assembler, "StoreToOffset");
}
-
TEST(Thumb2AssemblerTest, IfThen) {
arm::Thumb2Assembler assembler;
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index b79c2e46f0..9246c827a7 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -132,8 +132,8 @@ const char* DataProcessingRegisterResults[] = {
const char* DataProcessingImmediateResults[] = {
" 0: 2055 movs r0, #85 ; 0x55\n",
" 2: f06f 0055 mvn.w r0, #85 ; 0x55\n",
- " 6: f201 0055 addw r0, r1, #85 ; 0x55\n",
- " a: f2a1 0055 subw r0, r1, #85 ; 0x55\n",
+ " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n",
+ " a: f1a1 0055 sub.w r0, r1, #85 ; 0x55\n",
" e: f001 0055 and.w r0, r1, #85 ; 0x55\n",
" 12: f041 0055 orr.w r0, r1, #85 ; 0x55\n",
" 16: f061 0055 orn r0, r1, #85 ; 0x55\n",
@@ -201,6 +201,24 @@ const char* DataProcessingShiftedRegisterResults[] = {
" 32: ea5f 0834 movs.w r8, r4, rrx\n",
nullptr
};
+const char* ShiftImmediateResults[] = {
+ " 0: 0123 lsls r3, r4, #4\n",
+ " 2: 0963 lsrs r3, r4, #5\n",
+ " 4: 11a3 asrs r3, r4, #6\n",
+ " 6: ea4f 13f4 mov.w r3, r4, ror #7\n",
+ " a: ea4f 0334 mov.w r3, r4, rrx\n",
+ " e: ea4f 1304 mov.w r3, r4, lsl #4\n",
+ " 12: ea4f 1354 mov.w r3, r4, lsr #5\n",
+ " 16: ea4f 13a4 mov.w r3, r4, asr #6\n",
+ " 1a: ea4f 13f4 mov.w r3, r4, ror #7\n",
+ " 1e: ea4f 0334 mov.w r3, r4, rrx\n",
+ " 22: ea5f 1804 movs.w r8, r4, lsl #4\n",
+ " 26: ea5f 1854 movs.w r8, r4, lsr #5\n",
+ " 2a: ea5f 18a4 movs.w r8, r4, asr #6\n",
+ " 2e: ea5f 18f4 movs.w r8, r4, ror #7\n",
+ " 32: ea5f 0834 movs.w r8, r4, rrx\n",
+ nullptr
+};
const char* BasicLoadResults[] = {
" 0: 69a3 ldr r3, [r4, #24]\n",
" 2: 7e23 ldrb r3, [r4, #24]\n",
@@ -434,23 +452,115 @@ const char* MovWMovTResults[] = {
const char* SpecialAddSubResults[] = {
" 0: aa14 add r2, sp, #80 ; 0x50\n",
" 2: b014 add sp, #80 ; 0x50\n",
- " 4: f20d 0850 addw r8, sp, #80 ; 0x50\n",
- " 8: f60d 7200 addw r2, sp, #3840 ; 0xf00\n",
- " c: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n",
- " 10: b094 sub sp, #80 ; 0x50\n",
- " 12: f2ad 0050 subw r0, sp, #80 ; 0x50\n",
- " 16: f2ad 0850 subw r8, sp, #80 ; 0x50\n",
- " 1a: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n",
+ " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n",
+ " 8: f50d 6270 add.w r2, sp, #3840 ; 0xf00\n",
+ " c: f50d 6d70 add.w sp, sp, #3840 ; 0xf00\n",
+ " 10: f60d 7dfc addw sp, sp, #4092 ; 0xffc\n",
+ " 14: b094 sub sp, #80 ; 0x50\n",
+ " 16: f1ad 0050 sub.w r0, sp, #80 ; 0x50\n",
+ " 1a: f1ad 0850 sub.w r8, sp, #80 ; 0x50\n",
+ " 1e: f5ad 6d70 sub.w sp, sp, #3840 ; 0xf00\n",
+ " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n",
+ nullptr
+};
+const char* LoadFromOffsetResults[] = {
+ " 0: 68e2 ldr r2, [r4, #12]\n",
+ " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n",
+ " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n",
+ " a: 6812 ldr r2, [r2, #0]\n",
+ " c: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n",
+ " 10: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n",
+ " 14: f241 0200 movw r2, #4096 ; 0x1000\n",
+ " 18: f2c0 0210 movt r2, #16\n",
+ " 1c: 4422 add r2, r4\n",
+ " 1e: 6812 ldr r2, [r2, #0]\n",
+ " 20: f241 0c00 movw ip, #4096 ; 0x1000\n",
+ " 24: f2c0 0c10 movt ip, #16\n",
+ " 28: 4464 add r4, ip\n",
+ " 2a: 6824 ldr r4, [r4, #0]\n",
+ " 2c: 89a2 ldrh r2, [r4, #12]\n",
+ " 2e: f8b4 2fff ldrh.w r2, [r4, #4095] ; 0xfff\n",
+ " 32: f504 5280 add.w r2, r4, #4096 ; 0x1000\n",
+ " 36: 8812 ldrh r2, [r2, #0]\n",
+ " 38: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n",
+ " 3c: f8b2 20a4 ldrh.w r2, [r2, #164] ; 0xa4\n",
+ " 40: f241 0200 movw r2, #4096 ; 0x1000\n",
+ " 44: f2c0 0210 movt r2, #16\n",
+ " 48: 4422 add r2, r4\n",
+ " 4a: 8812 ldrh r2, [r2, #0]\n",
+ " 4c: f241 0c00 movw ip, #4096 ; 0x1000\n",
+ " 50: f2c0 0c10 movt ip, #16\n",
+ " 54: 4464 add r4, ip\n",
+ " 56: 8824 ldrh r4, [r4, #0]\n",
+ " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n",
+ " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020] ; 0x3fc\n",
+ " 60: f504 6280 add.w r2, r4, #1024 ; 0x400\n",
+ " 64: e9d2 2300 ldrd r2, r3, [r2]\n",
+ " 68: f504 2280 add.w r2, r4, #262144 ; 0x40000\n",
+ " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]; 0xa4\n",
+ " 70: f240 4200 movw r2, #1024 ; 0x400\n",
+ " 74: f2c0 0204 movt r2, #4\n",
+ " 78: 4422 add r2, r4\n",
+ " 7a: e9d2 2300 ldrd r2, r3, [r2]\n",
+ " 7e: f240 4c00 movw ip, #1024 ; 0x400\n",
+ " 82: f2c0 0c04 movt ip, #4\n",
+ " 86: 4464 add r4, ip\n",
+ " 88: e9d4 4500 ldrd r4, r5, [r4]\n",
+ " 8c: f8dc 000c ldr.w r0, [ip, #12]\n",
+ " 90: f5a4 1280 sub.w r2, r4, #1048576 ; 0x100000\n",
+ " 94: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n",
+ " 98: f994 200c ldrsb.w r2, [r4, #12]\n",
+ " 9c: 7b22 ldrb r2, [r4, #12]\n",
+ " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n",
nullptr
};
const char* StoreToOffsetResults[] = {
" 0: 60e2 str r2, [r4, #12]\n",
- " 2: f44f 5c00 mov.w ip, #8192 ; 0x2000\n",
- " 6: 44a4 add ip, r4\n",
- " 8: f8cc 2000 str.w r2, [ip]\n",
- " c: f8cc 000c str.w r0, [ip, #12]\n",
- " 10: f8ac 000c strh.w r0, [ip, #12]\n",
- " 14: f88c 200c strb.w r2, [ip, #12]\n",
+ " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n",
+ " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n",
+ " a: f8cc 2000 str.w r2, [ip]\n",
+ " e: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n",
+ " 12: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n",
+ " 16: f241 0c00 movw ip, #4096 ; 0x1000\n",
+ " 1a: f2c0 0c10 movt ip, #16\n",
+ " 1e: 44a4 add ip, r4\n",
+ " 20: f8cc 2000 str.w r2, [ip]\n",
+ " 24: f241 0c00 movw ip, #4096 ; 0x1000\n",
+ " 28: f2c0 0c10 movt ip, #16\n",
+ " 2c: 44a4 add ip, r4\n",
+ " 2e: f8cc 4000 str.w r4, [ip]\n",
+ " 32: 81a2 strh r2, [r4, #12]\n",
+ " 34: f8a4 2fff strh.w r2, [r4, #4095] ; 0xfff\n",
+ " 38: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n",
+ " 3c: f8ac 2000 strh.w r2, [ip]\n",
+ " 40: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n",
+ " 44: f8ac 20a4 strh.w r2, [ip, #164] ; 0xa4\n",
+ " 48: f241 0c00 movw ip, #4096 ; 0x1000\n",
+ " 4c: f2c0 0c10 movt ip, #16\n",
+ " 50: 44a4 add ip, r4\n",
+ " 52: f8ac 2000 strh.w r2, [ip]\n",
+ " 56: f241 0c00 movw ip, #4096 ; 0x1000\n",
+ " 5a: f2c0 0c10 movt ip, #16\n",
+ " 5e: 44a4 add ip, r4\n",
+ " 60: f8ac 4000 strh.w r4, [ip]\n",
+ " 64: e9c4 2303 strd r2, r3, [r4, #12]\n",
+ " 68: e9c4 23ff strd r2, r3, [r4, #1020] ; 0x3fc\n",
+ " 6c: f504 6c80 add.w ip, r4, #1024 ; 0x400\n",
+ " 70: e9cc 2300 strd r2, r3, [ip]\n",
+ " 74: f504 2c80 add.w ip, r4, #262144 ; 0x40000\n",
+ " 78: e9cc 2329 strd r2, r3, [ip, #164]; 0xa4\n",
+ " 7c: f240 4c00 movw ip, #1024 ; 0x400\n",
+ " 80: f2c0 0c04 movt ip, #4\n",
+ " 84: 44a4 add ip, r4\n",
+ " 86: e9cc 2300 strd r2, r3, [ip]\n",
+ " 8a: f240 4c00 movw ip, #1024 ; 0x400\n",
+ " 8e: f2c0 0c04 movt ip, #4\n",
+ " 92: 44a4 add ip, r4\n",
+ " 94: e9cc 4500 strd r4, r5, [ip]\n",
+ " 98: f8cc 000c str.w r0, [ip, #12]\n",
+ " 9c: f5a4 1c80 sub.w ip, r4, #1048576 ; 0x100000\n",
+ " a0: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n",
+ " a4: 7322 strb r2, [r4, #12]\n",
nullptr
};
const char* IfThenResults[] = {
@@ -4952,6 +5062,7 @@ void setup_results() {
test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults;
test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults;
test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults;
+ test_results["ShiftImmediate"] = ShiftImmediateResults;
test_results["BasicLoad"] = BasicLoadResults;
test_results["BasicStore"] = BasicStoreResults;
test_results["ComplexLoad"] = ComplexLoadResults;
@@ -4966,6 +5077,7 @@ void setup_results() {
test_results["StoreMultiple"] = StoreMultipleResults;
test_results["MovWMovT"] = MovWMovTResults;
test_results["SpecialAddSub"] = SpecialAddSubResults;
+ test_results["LoadFromOffset"] = LoadFromOffsetResults;
test_results["StoreToOffset"] = StoreToOffsetResults;
test_results["IfThen"] = IfThenResults;
test_results["CbzCbnz"] = CbzCbnzResults;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index aee64120a8..fc7ac7061a 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -310,15 +310,27 @@ void MipsAssembler::Seh(Register rd, Register rt) {
EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
}
+void MipsAssembler::Wsbh(Register rd, Register rt) {
+ EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
+}
+
void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+ CHECK(IsUint<5>(shamt)) << shamt;
EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
}
void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+ CHECK(IsUint<5>(shamt)) << shamt;
EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
}
+void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
+ CHECK(IsUint<5>(shamt)) << shamt;
+ EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02);
+}
+
void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+ CHECK(IsUint<5>(shamt)) << shamt;
EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
}
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 4038c1f1c4..1ef0992dac 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -135,9 +135,11 @@ class MipsAssembler FINAL : public Assembler {
void Seb(Register rd, Register rt); // R2+
void Seh(Register rd, Register rt); // R2+
+ void Wsbh(Register rd, Register rt); // R2+
void Sll(Register rd, Register rt, int shamt);
void Srl(Register rd, Register rt, int shamt);
+ void Rotr(Register rd, Register rt, int shamt); // R2+
void Sra(Register rd, Register rt, int shamt);
void Sllv(Register rd, Register rt, Register rs);
void Srlv(Register rd, Register rt, Register rs);
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index ba2525e555..107d5bb572 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -19,15 +19,73 @@
#include "base/bit_utils.h"
#include "base/casts.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "memory_region.h"
#include "thread.h"
namespace art {
namespace mips64 {
+void Mips64Assembler::FinalizeCode() {
+ for (auto& exception_block : exception_blocks_) {
+ EmitExceptionPoll(&exception_block);
+ }
+ PromoteBranches();
+}
+
+void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) {
+ EmitBranches();
+ Assembler::FinalizeInstructions(region);
+ PatchCFI();
+}
+
+void Mips64Assembler::PatchCFI() {
+ if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+ return;
+ }
+
+ typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+ const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+ const std::vector<uint8_t>& old_stream = data.first;
+ const std::vector<DelayedAdvancePC>& advances = data.second;
+
+ // Refill our data buffer with patched opcodes.
+ cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+ size_t stream_pos = 0;
+ for (const DelayedAdvancePC& advance : advances) {
+ DCHECK_GE(advance.stream_pos, stream_pos);
+ // Copy old data up to the point where advance was issued.
+ cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+ stream_pos = advance.stream_pos;
+ // Insert the advance command with its final offset.
+ size_t final_pc = GetAdjustedPosition(advance.pc);
+ cfi().AdvancePC(final_pc);
+ }
+ // Copy the final segment if any.
+ cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
+void Mips64Assembler::EmitBranches() {
+ CHECK(!overwriting_);
+ // Switch from appending instructions at the end of the buffer to overwriting
+ // existing instructions (branch placeholders) in the buffer.
+ overwriting_ = true;
+ for (auto& branch : branches_) {
+ EmitBranch(&branch);
+ }
+ overwriting_ = false;
+}
+
void Mips64Assembler::Emit(uint32_t value) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- buffer_.Emit<uint32_t>(value);
+ if (overwriting_) {
+ // Branches to labels are emitted into their placeholders here.
+ buffer_.Store<uint32_t>(overwrite_location_, value);
+ overwrite_location_ += sizeof(uint32_t);
+ } else {
+ // Other instructions are simply appended at the end here.
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ buffer_.Emit<uint32_t>(value);
+ }
}
void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
@@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t
void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) {
CHECK_NE(rs, kNoGpuRegister);
+ CHECK(IsUint<21>(imm21)) << imm21;
uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
static_cast<uint32_t>(rs) << kRsShift |
- (imm21 & 0x1FFFFF);
+ imm21;
Emit(encoding);
}
-void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) {
- uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
- (addr26 & 0x3FFFFFF);
+void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) {
+ CHECK(IsUint<26>(imm26)) << imm26;
+ uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
Emit(encoding);
}
@@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
EmitI(0xb, rs, rt, imm16);
}
-void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
- EmitI(0x4, rs, rt, imm16);
- Nop();
-}
-
-void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
- EmitI(0x5, rs, rt, imm16);
- Nop();
-}
-
-void Mips64Assembler::J(uint32_t addr26) {
- EmitJ(0x2, addr26);
- Nop();
-}
-
-void Mips64Assembler::Jal(uint32_t addr26) {
- EmitJ(0x3, addr26);
- Nop();
-}
-
void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
EmitR(0, rs, rt, rd, 0, 0x35);
}
@@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) {
void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) {
EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09);
- Nop();
}
void Mips64Assembler::Jalr(GpuRegister rs) {
@@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) {
EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16);
}
+void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) {
+ CHECK(IsUint<19>(imm19)) << imm19;
+ EmitI21(0x3B, rs, imm19);
+}
+
+void Mips64Assembler::Bc(uint32_t imm26) {
+ EmitI26(0x32, imm26);
+}
+
void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
}
@@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+ EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
}
void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
CHECK_NE(rs, ZERO);
CHECK_NE(rt, ZERO);
CHECK_NE(rs, rt);
- EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+ EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
}
void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) {
@@ -569,6 +616,65 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) {
EmitI21(0x3E, rs, imm21);
}
+void Mips64Assembler::EmitBcondc(BranchCondition cond,
+ GpuRegister rs,
+ GpuRegister rt,
+ uint32_t imm16_21) {
+ switch (cond) {
+ case kCondLT:
+ Bltc(rs, rt, imm16_21);
+ break;
+ case kCondGE:
+ Bgec(rs, rt, imm16_21);
+ break;
+ case kCondLE:
+ Bgec(rt, rs, imm16_21);
+ break;
+ case kCondGT:
+ Bltc(rt, rs, imm16_21);
+ break;
+ case kCondLTZ:
+ CHECK_EQ(rt, ZERO);
+ Bltzc(rs, imm16_21);
+ break;
+ case kCondGEZ:
+ CHECK_EQ(rt, ZERO);
+ Bgezc(rs, imm16_21);
+ break;
+ case kCondLEZ:
+ CHECK_EQ(rt, ZERO);
+ Blezc(rs, imm16_21);
+ break;
+ case kCondGTZ:
+ CHECK_EQ(rt, ZERO);
+ Bgtzc(rs, imm16_21);
+ break;
+ case kCondEQ:
+ Beqc(rs, rt, imm16_21);
+ break;
+ case kCondNE:
+ Bnec(rs, rt, imm16_21);
+ break;
+ case kCondEQZ:
+ CHECK_EQ(rt, ZERO);
+ Beqzc(rs, imm16_21);
+ break;
+ case kCondNEZ:
+ CHECK_EQ(rt, ZERO);
+ Bnezc(rs, imm16_21);
+ break;
+ case kCondLTU:
+ Bltuc(rs, rt, imm16_21);
+ break;
+ case kCondGEU:
+ Bgeuc(rs, rt, imm16_21);
+ break;
+ case kUncond:
+ LOG(FATAL) << "Unexpected branch condition " << cond;
+ UNREACHABLE();
+ }
+}
+
void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
}
@@ -925,15 +1031,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) {
}
}
-void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) {
- if (IsInt<16>(value)) {
- Addiu(rt, rs, value);
- } else {
- LoadConst32(rtmp, value);
- Addu(rt, rs, rtmp);
- }
-}
-
void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
if (IsInt<16>(value)) {
Daddiu(rt, rs, value);
@@ -943,177 +1040,621 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp
}
}
-//
-// MIPS64R6 branches
-//
-//
-// Unconditional (pc + 32-bit signed offset):
-//
-// auipc at, ofs_high
-// jic at, ofs_low
-// // no delay/forbidden slot
-//
-//
-// Conditional (pc + 32-bit signed offset):
-//
-// b<cond>c reg, +2 // skip next 2 instructions
-// auipc at, ofs_high
-// jic at, ofs_low
-// // no delay/forbidden slot
-//
-//
-// Unconditional (pc + 32-bit signed offset) and link:
-//
-// auipc reg, ofs_high
-// daddiu reg, ofs_low
-// jialc reg, 0
-// // no delay/forbidden slot
-//
-//
-// TODO: use shorter instruction sequences whenever possible.
-//
-
-void Mips64Assembler::Bind(Label* label) {
- CHECK(!label->IsBound());
- int32_t bound_pc = buffer_.Size();
+void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size,
+ Mips64Assembler::Branch::Type short_type,
+ Mips64Assembler::Branch::Type long_type) {
+ type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
+}
- // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label.
- // Embed the previously unknown pc-relative addresses in them.
- while (label->IsLinked()) {
- int32_t position = label->Position();
- // Extract the branch (instruction pair)
- uint32_t auipc = buffer_.Load<uint32_t>(position);
- uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu
+void Mips64Assembler::Branch::InitializeType(bool is_call) {
+ OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+ if (is_call) {
+ InitShortOrLong(offset_size, kCall, kLongCall);
+ } else if (condition_ == kUncond) {
+ InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+ } else {
+ if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
+ // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+ type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
+ } else {
+ InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+ }
+ }
+ old_type_ = type_;
+}
+
+bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) {
+ switch (condition) {
+ case kCondLT:
+ case kCondGT:
+ case kCondNE:
+ case kCondLTU:
+ return lhs == rhs;
+ default:
+ return false;
+ }
+}
+
+bool Mips64Assembler::Branch::IsUncond(BranchCondition condition,
+ GpuRegister lhs,
+ GpuRegister rhs) {
+ switch (condition) {
+ case kUncond:
+ return true;
+ case kCondGE:
+ case kCondLE:
+ case kCondEQ:
+ case kCondGEU:
+ return lhs == rhs;
+ default:
+ return false;
+ }
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(ZERO),
+ rhs_reg_(ZERO),
+ condition_(kUncond) {
+ InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location,
+ uint32_t target,
+ Mips64Assembler::BranchCondition condition,
+ GpuRegister lhs_reg,
+ GpuRegister rhs_reg)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(lhs_reg),
+ rhs_reg_(rhs_reg),
+ condition_(condition) {
+ CHECK_NE(condition, kUncond);
+ switch (condition) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondLT:
+ case kCondGE:
+ case kCondLE:
+ case kCondGT:
+ case kCondLTU:
+ case kCondGEU:
+ CHECK_NE(lhs_reg, ZERO);
+ CHECK_NE(rhs_reg, ZERO);
+ break;
+ case kCondLTZ:
+ case kCondGEZ:
+ case kCondLEZ:
+ case kCondGTZ:
+ case kCondEQZ:
+ case kCondNEZ:
+ CHECK_NE(lhs_reg, ZERO);
+ CHECK_EQ(rhs_reg, ZERO);
+ break;
+ case kUncond:
+ UNREACHABLE();
+ }
+ CHECK(!IsNop(condition, lhs_reg, rhs_reg));
+ if (IsUncond(condition, lhs_reg, rhs_reg)) {
+ // Branch condition is always true, make the branch unconditional.
+ condition_ = kUncond;
+ }
+ InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(indirect_reg),
+ rhs_reg_(ZERO),
+ condition_(kUncond) {
+ CHECK_NE(indirect_reg, ZERO);
+ CHECK_NE(indirect_reg, AT);
+ InitializeType(true);
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
+ Mips64Assembler::BranchCondition cond) {
+ switch (cond) {
+ case kCondLT:
+ return kCondGE;
+ case kCondGE:
+ return kCondLT;
+ case kCondLE:
+ return kCondGT;
+ case kCondGT:
+ return kCondLE;
+ case kCondLTZ:
+ return kCondGEZ;
+ case kCondGEZ:
+ return kCondLTZ;
+ case kCondLEZ:
+ return kCondGTZ;
+ case kCondGTZ:
+ return kCondLEZ;
+ case kCondEQ:
+ return kCondNE;
+ case kCondNE:
+ return kCondEQ;
+ case kCondEQZ:
+ return kCondNEZ;
+ case kCondNEZ:
+ return kCondEQZ;
+ case kCondLTU:
+ return kCondGEU;
+ case kCondGEU:
+ return kCondLTU;
+ case kUncond:
+ LOG(FATAL) << "Unexpected branch condition " << cond;
+ }
+ UNREACHABLE();
+}
+
+Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const {
+ return type_;
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const {
+ return condition_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetLeftRegister() const {
+ return lhs_reg_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetRightRegister() const {
+ return rhs_reg_;
+}
+
+uint32_t Mips64Assembler::Branch::GetTarget() const {
+ return target_;
+}
- // Extract the location of the previous pair in the list (walking the list backwards;
- // the previous pair location was stored in the immediate operands of the instructions)
- int32_t prev = (auipc << 16) | (jic & 0xFFFF);
+uint32_t Mips64Assembler::Branch::GetLocation() const {
+ return location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLocation() const {
+ return old_location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLength() const {
+ return branch_info_[type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLength() const {
+ return branch_info_[old_type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetSize() const {
+ return GetLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOldSize() const {
+ return GetOldLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetEndLocation() const {
+ return GetLocation() + GetSize();
+}
+
+uint32_t Mips64Assembler::Branch::GetOldEndLocation() const {
+ return GetOldLocation() + GetOldSize();
+}
+
+bool Mips64Assembler::Branch::IsLong() const {
+ switch (type_) {
+ // Short branches.
+ case kUncondBranch:
+ case kCondBranch:
+ case kCall:
+ return false;
+ // Long branches.
+ case kLongUncondBranch:
+ case kLongCondBranch:
+ case kLongCall:
+ return true;
+ }
+ UNREACHABLE();
+}
+
+bool Mips64Assembler::Branch::IsResolved() const {
+ return target_ != kUnresolved;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const {
+ OffsetBits offset_size =
+ (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+ ? kOffset23
+ : branch_info_[type_].offset_size;
+ return offset_size;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location,
+ uint32_t target) {
+ // For unresolved targets assume the shortest encoding
+ // (later it will be made longer if needed).
+ if (target == kUnresolved)
+ return kOffset16;
+ int64_t distance = static_cast<int64_t>(target) - location;
+ // To simplify calculations in composite branches consisting of multiple instructions
+ // bump up the distance by a value larger than the max byte size of a composite branch.
+ distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize;
+ if (IsInt<kOffset16>(distance))
+ return kOffset16;
+ else if (IsInt<kOffset18>(distance))
+ return kOffset18;
+ else if (IsInt<kOffset21>(distance))
+ return kOffset21;
+ else if (IsInt<kOffset23>(distance))
+ return kOffset23;
+ else if (IsInt<kOffset28>(distance))
+ return kOffset28;
+ return kOffset32;
+}
+
+void Mips64Assembler::Branch::Resolve(uint32_t target) {
+ target_ = target;
+}
+
+void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+ if (location_ > expand_location) {
+ location_ += delta;
+ }
+ if (!IsResolved()) {
+ return; // Don't know the target yet.
+ }
+ if (target_ > expand_location) {
+ target_ += delta;
+ }
+}
+
+void Mips64Assembler::Branch::PromoteToLong() {
+ switch (type_) {
+ // Short branches.
+ case kUncondBranch:
+ type_ = kLongUncondBranch;
+ break;
+ case kCondBranch:
+ type_ = kLongCondBranch;
+ break;
+ case kCall:
+ type_ = kLongCall;
+ break;
+ default:
+ // Note: 'type_' is already long.
+ break;
+ }
+ CHECK(IsLong());
+}
+
+uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+ // If the branch is still unresolved or already long, nothing to do.
+ if (IsLong() || !IsResolved()) {
+ return 0;
+ }
+ // Promote the short branch to long if the offset size is too small
+ // to hold the distance between location_ and target_.
+ if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+ PromoteToLong();
+ uint32_t old_size = GetOldSize();
+ uint32_t new_size = GetSize();
+ CHECK_GT(new_size, old_size);
+ return new_size - old_size;
+ }
+ // The following logic is for debugging/testing purposes.
+ // Promote some short branches to long when it's not really required.
+ if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+ int64_t distance = static_cast<int64_t>(target_) - location_;
+ distance = (distance >= 0) ? distance : -distance;
+ if (distance >= max_short_distance) {
+ PromoteToLong();
+ uint32_t old_size = GetOldSize();
+ uint32_t new_size = GetSize();
+ CHECK_GT(new_size, old_size);
+ return new_size - old_size;
+ }
+ }
+ return 0;
+}
+
+uint32_t Mips64Assembler::Branch::GetOffsetLocation() const {
+ return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOffset() const {
+ CHECK(IsResolved());
+ uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
+ // Calculate the byte distance between instructions and also account for
+ // different PC-relative origins.
+ uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+ // Prepare the offset for encoding into the instruction(s).
+ offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
+ return offset;
+}
- // Get the pc-relative address
- uint32_t offset = bound_pc - position;
- offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu
+Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) {
+ CHECK_LT(branch_id, branches_.size());
+ return &branches_[branch_id];
+}
+
+const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const {
+ CHECK_LT(branch_id, branches_.size());
+ return &branches_[branch_id];
+}
+
+void Mips64Assembler::Bind(Mips64Label* label) {
+ CHECK(!label->IsBound());
+ uint32_t bound_pc = buffer_.Size();
- // Embed it in the two instructions
- auipc = (auipc & 0xFFFF0000) | (offset >> 16);
- jic = (jic & 0xFFFF0000) | (offset & 0xFFFF);
+ // Walk the list of branches referring to and preceding this label.
+ // Store the previously unknown target addresses in them.
+ while (label->IsLinked()) {
+ uint32_t branch_id = label->Position();
+ Branch* branch = GetBranch(branch_id);
+ branch->Resolve(bound_pc);
- // Save the adjusted instructions
- buffer_.Store<uint32_t>(position, auipc);
- buffer_.Store<uint32_t>(position + 4, jic);
+ uint32_t branch_location = branch->GetLocation();
+ // Extract the location of the previous branch in the list (walking the list backwards;
+ // the previous branch ID was stored in the space reserved for this branch).
+ uint32_t prev = buffer_.Load<uint32_t>(branch_location);
// On to the previous branch in the list...
label->position_ = prev;
}
- // Now make the label object contain its own location
- // (it will be used by the branches referring to and following this label)
+ // Now make the label object contain its own location (relative to the end of the preceding
+ // branch, if any; it will be used by the branches referring to and following this label).
+ label->prev_branch_id_plus_one_ = branches_.size();
+ if (label->prev_branch_id_plus_one_) {
+ uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+ const Branch* branch = GetBranch(branch_id);
+ bound_pc -= branch->GetEndLocation();
+ }
label->BindTo(bound_pc);
}
-void Mips64Assembler::B(Label* label) {
- if (label->IsBound()) {
- // Branch backwards (to a preceding label), distance is known
- uint32_t offset = label->Position() - buffer_.Size();
- CHECK_LE(static_cast<int32_t>(offset), 0);
- offset += (offset & 0x8000) << 1; // account for sign extension in jic
- Auipc(AT, offset >> 16);
- Jic(AT, offset);
- } else {
- // Branch forward (to a following label), distance is unknown
- int32_t position = buffer_.Size();
- // The first branch forward will have 0 in its pc-relative address (copied from label's
- // position). It will be the terminator of the list of forward-reaching branches.
- uint32_t prev = label->position_;
- Auipc(AT, prev >> 16);
- Jic(AT, prev);
- // Now make the link object point to the location of this branch
- // (this forms a linked list of branches preceding this label)
- label->LinkTo(position);
+uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const {
+ CHECK(label->IsBound());
+ uint32_t target = label->Position();
+ if (label->prev_branch_id_plus_one_) {
+ // Get label location based on the branch preceding it.
+ uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+ const Branch* branch = GetBranch(branch_id);
+ target += branch->GetEndLocation();
+ }
+ return target;
+}
+
+uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) {
+ // We can reconstruct the adjustment by going through all the branches from the beginning
+ // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
+ // with increasing old_position, we can use the data from last AdjustedPosition() to
+ // continue where we left off and the whole loop should be O(m+n) where m is the number
+ // of positions to adjust and n is the number of branches.
+ if (old_position < last_old_position_) {
+ last_position_adjustment_ = 0;
+ last_old_position_ = 0;
+ last_branch_id_ = 0;
+ }
+ while (last_branch_id_ != branches_.size()) {
+ const Branch* branch = GetBranch(last_branch_id_);
+ if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+ break;
+ }
+ last_position_adjustment_ += branch->GetSize() - branch->GetOldSize();
+ ++last_branch_id_;
+ }
+ last_old_position_ = old_position;
+ return old_position + last_position_adjustment_;
+}
+
+void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) {
+ uint32_t length = branches_.back().GetLength();
+ if (!label->IsBound()) {
+ // Branch forward (to a following label), distance is unknown.
+ // The first branch forward will contain 0, serving as the terminator of
+ // the list of forward-reaching branches.
+ Emit(label->position_);
+ length--;
+ // Now make the label object point to this branch
+ // (this forms a linked list of branches preceding this label).
+ uint32_t branch_id = branches_.size() - 1;
+ label->LinkTo(branch_id);
+ }
+ // Reserve space for the branch.
+ while (length--) {
+ Nop();
}
}
-void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) {
- if (label->IsBound()) {
- // Branch backwards (to a preceding label), distance is known
- uint32_t offset = label->Position() - buffer_.Size();
- CHECK_LE(static_cast<int32_t>(offset), 0);
- offset += (offset & 0x8000) << 1; // account for sign extension in daddiu
- Auipc(indirect_reg, offset >> 16);
- Daddiu(indirect_reg, indirect_reg, offset);
- Jialc(indirect_reg, 0);
- } else {
- // Branch forward (to a following label), distance is unknown
- int32_t position = buffer_.Size();
- // The first branch forward will have 0 in its pc-relative address (copied from label's
- // position). It will be the terminator of the list of forward-reaching branches.
- uint32_t prev = label->position_;
- Auipc(indirect_reg, prev >> 16);
- Daddiu(indirect_reg, indirect_reg, prev);
- Jialc(indirect_reg, 0);
- // Now make the link object point to the location of this branch
- // (this forms a linked list of branches preceding this label)
- label->LinkTo(position);
+void Mips64Assembler::Buncond(Mips64Label* label) {
+ uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+ branches_.emplace_back(buffer_.Size(), target);
+ FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Bcond(Mips64Label* label,
+ BranchCondition condition,
+ GpuRegister lhs,
+ GpuRegister rhs) {
+ // If lhs = rhs, this can be a NOP.
+ if (Branch::IsNop(condition, lhs, rhs)) {
+ return;
+ }
+ uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+ branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs);
+ FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) {
+ uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+ branches_.emplace_back(buffer_.Size(), target, indirect_reg);
+ FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::PromoteBranches() {
+ // Promote short branches to long as necessary.
+ bool changed;
+ do {
+ changed = false;
+ for (auto& branch : branches_) {
+ CHECK(branch.IsResolved());
+ uint32_t delta = branch.PromoteIfNeeded();
+ // If this branch has been promoted and needs to expand in size,
+ // relocate all branches by the expansion size.
+ if (delta) {
+ changed = true;
+ uint32_t expand_location = branch.GetLocation();
+ for (auto& branch2 : branches_) {
+ branch2.Relocate(expand_location, delta);
+ }
+ }
+ }
+ } while (changed);
+
+ // Account for branch expansion by resizing the code buffer
+ // and moving the code in it to its final location.
+ size_t branch_count = branches_.size();
+ if (branch_count > 0) {
+ // Resize.
+ Branch& last_branch = branches_[branch_count - 1];
+ uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+ uint32_t old_size = buffer_.Size();
+ buffer_.Resize(old_size + size_delta);
+ // Move the code residing between branch placeholders.
+ uint32_t end = old_size;
+ for (size_t i = branch_count; i > 0; ) {
+ Branch& branch = branches_[--i];
+ uint32_t size = end - branch.GetOldEndLocation();
+ buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+ end = branch.GetOldLocation();
+ }
+ }
+}
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = {
+ // Short branches.
+ { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch
+ { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch
+ // Exception: kOffset23 for beqzc/bnezc
+ { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall
+ // Long branches.
+ { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch
+ { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch
+ { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall
+};
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
+ CHECK(overwriting_);
+ overwrite_location_ = branch->GetLocation();
+ uint32_t offset = branch->GetOffset();
+ BranchCondition condition = branch->GetCondition();
+ GpuRegister lhs = branch->GetLeftRegister();
+ GpuRegister rhs = branch->GetRightRegister();
+ switch (branch->GetType()) {
+ // Short branches.
+ case Branch::kUncondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Bc(offset);
+ break;
+ case Branch::kCondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ EmitBcondc(condition, lhs, rhs, offset);
+ Nop(); // TODO: improve by filling the forbidden slot.
+ break;
+ case Branch::kCall:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Addiupc(lhs, offset);
+ Jialc(lhs, 0);
+ break;
+
+ // Long branches.
+ case Branch::kLongUncondBranch:
+ offset += (offset & 0x8000) << 1; // Account for sign extension in jic.
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Auipc(AT, High16Bits(offset));
+ Jic(AT, Low16Bits(offset));
+ break;
+ case Branch::kLongCondBranch:
+ EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+ offset += (offset & 0x8000) << 1; // Account for sign extension in jic.
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Auipc(AT, High16Bits(offset));
+ Jic(AT, Low16Bits(offset));
+ break;
+ case Branch::kLongCall:
+ offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu.
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Auipc(lhs, High16Bits(offset));
+ Daddiu(lhs, lhs, Low16Bits(offset));
+ Jialc(lhs, 0);
+ break;
}
+ CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+ CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
}
-void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) {
- Bgec(rs, rt, 2);
- B(label);
+void Mips64Assembler::Bc(Mips64Label* label) {
+ Buncond(label);
}
-void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) {
- Bgezc(rt, 2);
- B(label);
+void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) {
+ Call(label, indirect_reg);
}
-void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) {
- Blezc(rt, 2);
- B(label);
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondLT, rs, rt);
}
-void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) {
- Bltc(rs, rt, 2);
- B(label);
+void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondLTZ, rt);
}
-void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) {
- Bltzc(rt, 2);
- B(label);
+void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondGTZ, rt);
}
-void Mips64Assembler::Blezc(GpuRegister rt, Label* label) {
- Bgtzc(rt, 2);
- B(label);
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondGE, rs, rt);
}
-void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) {
- Bgeuc(rs, rt, 2);
- B(label);
+void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondGEZ, rt);
}
-void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) {
- Bltuc(rs, rt, 2);
- B(label);
+void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondLEZ, rt);
}
-void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) {
- Bnec(rs, rt, 2);
- B(label);
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondLTU, rs, rt);
}
-void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) {
- Beqc(rs, rt, 2);
- B(label);
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondGEU, rs, rt);
}
-void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) {
- Bnezc(rs, 2);
- B(label);
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondEQ, rs, rt);
}
-void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) {
- Beqzc(rs, 2);
- B(label);
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+ Bcond(label, kCondNE, rs, rt);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) {
+ Bcond(label, kCondEQZ, rs);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
+ Bcond(label, kCondNEZ, rs);
}
void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
@@ -1256,6 +1797,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
const std::vector<ManagedRegister>& callee_save_regs,
const ManagedRegisterEntrySpills& entry_spills) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ DCHECK(!overwriting_);
// Increase frame to required size.
IncreaseFrameSize(frame_size);
@@ -1298,6 +1840,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void Mips64Assembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& callee_save_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ DCHECK(!overwriting_);
cfi_.RememberState();
// Pop callee saves and return address
@@ -1316,6 +1859,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size,
// Then jump to the return address.
Jr(RA);
+ Nop();
// The CFI should be restored for any code that follows the exit block.
cfi_.RestoreState();
@@ -1324,12 +1868,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size,
void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kFramePointerSize);
+ DCHECK(!overwriting_);
Daddiu64(SP, SP, static_cast<int32_t>(-adjust));
cfi_.AdjustCFAOffset(adjust);
}
void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kFramePointerSize);
+ DCHECK(!overwriting_);
Daddiu64(SP, SP, static_cast<int32_t>(adjust));
cfi_.AdjustCFAOffset(-adjust);
}
@@ -1379,17 +1925,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
}
-void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
- ManagedRegister mscratch) {
- Mips64ManagedRegister scratch = mscratch.AsMips64();
- CHECK(scratch.IsGpuRegister()) << scratch;
- // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?).
- // Is this function even referenced anywhere else in the code?
- LoadConst32(scratch.AsGpuRegister(), imm);
- StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
-}
-
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
FrameOffset fr_offs,
ManagedRegister mscratch) {
Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -1398,7 +1934,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
}
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) {
StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
}
@@ -1415,7 +1951,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size)
return EmitLoad(mdest, SP, src.Int32Value(), size);
}
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
+void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
+ ThreadOffset<kMipsDoublewordSize> src,
+ size_t size) {
return EmitLoad(mdest, S1, src.Int32Value(), size);
}
@@ -1449,18 +1987,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
}
void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
- ThreadOffset<8> offs) {
+ ThreadOffset<kMipsDoublewordSize> offs) {
Mips64ManagedRegister dest = mdest.AsMips64();
CHECK(dest.IsGpuRegister());
LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
}
-void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
- UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips";
+void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+ size_t size ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64";
}
-void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
- UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
+void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+ size_t size ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64";
}
void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
@@ -1492,7 +2032,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
}
void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
- ThreadOffset<8> thr_offs,
+ ThreadOffset<kMipsDoublewordSize> thr_offs,
ManagedRegister mscratch) {
Mips64ManagedRegister scratch = mscratch.AsMips64();
CHECK(scratch.IsGpuRegister()) << scratch;
@@ -1500,7 +2040,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
}
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
FrameOffset fr_offs,
ManagedRegister mscratch) {
Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -1561,9 +2101,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO
}
}
-void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
- ManagedRegister /*mscratch*/, size_t /*size*/) {
- UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+ FrameOffset src_base ATTRIBUTE_UNUSED,
+ Offset src_offset ATTRIBUTE_UNUSED,
+ ManagedRegister mscratch ATTRIBUTE_UNUSED,
+ size_t size ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
}
void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
@@ -1584,15 +2127,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
}
}
-void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset
-/*src_offset*/,
- ManagedRegister /*mscratch*/, size_t /*size*/) {
- UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+ Offset dest_offset ATTRIBUTE_UNUSED,
+ FrameOffset src ATTRIBUTE_UNUSED,
+ Offset src_offset ATTRIBUTE_UNUSED,
+ ManagedRegister mscratch ATTRIBUTE_UNUSED,
+ size_t size ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
}
-void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) {
// TODO: sync?
- UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+ UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
}
void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
@@ -1604,7 +2150,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
CHECK(out_reg.IsGpuRegister()) << out_reg;
if (null_allowed) {
- Label null_arg;
+ Mips64Label null_arg;
// Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is
// the address in the handle scope holding the reference.
// e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
@@ -1631,7 +2177,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
Mips64ManagedRegister scratch = mscratch.AsMips64();
CHECK(scratch.IsGpuRegister()) << scratch;
if (null_allowed) {
- Label null_arg;
+ Mips64Label null_arg;
LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
handle_scope_offset.Int32Value());
// Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is
@@ -1653,7 +2199,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
Mips64ManagedRegister in_reg = min_reg.AsMips64();
CHECK(out_reg.IsGpuRegister()) << out_reg;
CHECK(in_reg.IsGpuRegister()) << in_reg;
- Label null_arg;
+ Mips64Label null_arg;
if (!out_reg.Equals(in_reg)) {
LoadConst32(out_reg.AsGpuRegister(), 0);
}
@@ -1663,11 +2209,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
Bind(&null_arg);
}
-void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
+ bool could_be_null ATTRIBUTE_UNUSED) {
// TODO: not validating references
}
-void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
+ bool could_be_null ATTRIBUTE_UNUSED) {
// TODO: not validating references
}
@@ -1679,6 +2227,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister
LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
base.AsGpuRegister(), offset.Int32Value());
Jalr(scratch.AsGpuRegister());
+ Nop();
// TODO: place reference map on call
}
@@ -1691,11 +2240,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr
LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
scratch.AsGpuRegister(), offset.Int32Value());
Jalr(scratch.AsGpuRegister());
+ Nop();
// TODO: place reference map on call
}
-void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) {
- UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED,
+ ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
}
void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
@@ -1703,37 +2254,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
}
void Mips64Assembler::GetCurrentThread(FrameOffset offset,
- ManagedRegister /*mscratch*/) {
+ ManagedRegister mscratch ATTRIBUTE_UNUSED) {
StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
}
void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
Mips64ManagedRegister scratch = mscratch.AsMips64();
- Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust);
- buffer_.EnqueueSlowPath(slow);
- LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
- S1, Thread::ExceptionOffset<8>().Int32Value());
- Bnezc(scratch.AsGpuRegister(), slow->Entry());
-}
-
-void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
- Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
-#define __ sp_asm->
- __ Bind(&entry_);
- if (stack_adjust_ != 0) { // Fix up the frame.
- __ DecreaseFrameSize(stack_adjust_);
+ exception_blocks_.emplace_back(scratch, stack_adjust);
+ LoadFromOffset(kLoadDoubleword,
+ scratch.AsGpuRegister(),
+ S1,
+ Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value());
+ Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
+}
+
+void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) {
+ Bind(exception->Entry());
+ if (exception->stack_adjust_ != 0) { // Fix up the frame.
+ DecreaseFrameSize(exception->stack_adjust_);
}
- // Pass exception object as argument
- // Don't care about preserving A0 as this call won't return
- __ Move(A0, scratch_.AsGpuRegister());
+ // Pass exception object as argument.
+ // Don't care about preserving A0 as this call won't return.
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+ Move(A0, exception->scratch_.AsGpuRegister());
// Set up call to Thread::Current()->pDeliverException
- __ LoadFromOffset(kLoadDoubleword, T9, S1,
- QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
- // TODO: check T9 usage
- __ Jr(T9);
+ LoadFromOffset(kLoadDoubleword,
+ T9,
+ S1,
+ QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value());
+ Jr(T9);
+ Nop();
+
// Call never returns
- __ Break();
-#undef __
+ Break();
}
} // namespace mips64
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 42962bca20..57fc19a6e9 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -17,18 +17,22 @@
#ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
#define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
+#include <utility>
#include <vector>
#include "base/macros.h"
#include "constants_mips64.h"
#include "globals.h"
#include "managed_register_mips64.h"
-#include "utils/assembler.h"
#include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
namespace art {
namespace mips64 {
+static constexpr size_t kMipsDoublewordSize = 8;
+
enum LoadOperandType {
kLoadSignedByte,
kLoadUnsignedByte,
@@ -60,10 +64,57 @@ enum FPClassMaskType {
kPositiveZero = 0x200,
};
+class Mips64Label : public Label {
+ public:
+ Mips64Label() : prev_branch_id_plus_one_(0) {}
+
+ Mips64Label(Mips64Label&& src)
+ : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+
+ private:
+ uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any.
+
+ friend class Mips64Assembler;
+ DISALLOW_COPY_AND_ASSIGN(Mips64Label);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null.
+class Mips64ExceptionSlowPath {
+ public:
+ explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
+ : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+ Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src)
+ : scratch_(src.scratch_),
+ stack_adjust_(src.stack_adjust_),
+ exception_entry_(std::move(src.exception_entry_)) {}
+
+ private:
+ Mips64Label* Entry() { return &exception_entry_; }
+ const Mips64ManagedRegister scratch_;
+ const size_t stack_adjust_;
+ Mips64Label exception_entry_;
+
+ friend class Mips64Assembler;
+ DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
+};
+
class Mips64Assembler FINAL : public Assembler {
public:
- Mips64Assembler() {}
- virtual ~Mips64Assembler() {}
+ Mips64Assembler()
+ : overwriting_(false),
+ overwrite_location_(0),
+ last_position_adjustment_(0),
+ last_old_position_(0),
+ last_branch_id_(0) {
+ cfi().DelayEmittingAdvancePCs();
+ }
+
+ virtual ~Mips64Assembler() {
+ for (auto& branch : branches_) {
+ CHECK(branch.IsResolved());
+ }
+ }
// Emit Machine Instructions.
void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
@@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler {
void Dclz(GpuRegister rd, GpuRegister rs);
void Dclo(GpuRegister rd, GpuRegister rs);
- void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);
- void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);
- void J(uint32_t addr26);
- void Jal(uint32_t addr26);
void Jalr(GpuRegister rd, GpuRegister rs);
void Jalr(GpuRegister rs);
void Jr(GpuRegister rs);
void Auipc(GpuRegister rs, uint16_t imm16);
+ void Addiupc(GpuRegister rs, uint32_t imm19);
+ void Bc(uint32_t imm26);
void Jic(GpuRegister rt, uint16_t imm16);
void Jialc(GpuRegister rt, uint16_t imm16);
void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
@@ -240,32 +289,34 @@ class Mips64Assembler FINAL : public Assembler {
void Clear(GpuRegister rd);
void Not(GpuRegister rd, GpuRegister rs);
- // Higher level composite instructions
+ // Higher level composite instructions.
void LoadConst32(GpuRegister rd, int32_t value);
void LoadConst64(GpuRegister rd, int64_t value); // MIPS64
- void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64
- void Bind(Label* label) OVERRIDE;
- void Jump(Label* label) OVERRIDE {
- B(label);
+ void Bind(Label* label) OVERRIDE {
+ Bind(down_cast<Mips64Label*>(label));
+ }
+ void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
+ UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64";
}
- void B(Label* label);
- void Jalr(Label* label, GpuRegister indirect_reg = RA);
- // TODO: implement common for R6 and non-R6 interface for conditional branches?
- void Bltc(GpuRegister rs, GpuRegister rt, Label* label);
- void Bltzc(GpuRegister rt, Label* label);
- void Bgtzc(GpuRegister rt, Label* label);
- void Bgec(GpuRegister rs, GpuRegister rt, Label* label);
- void Bgezc(GpuRegister rt, Label* label);
- void Blezc(GpuRegister rt, Label* label);
- void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);
- void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);
- void Beqc(GpuRegister rs, GpuRegister rt, Label* label);
- void Bnec(GpuRegister rs, GpuRegister rt, Label* label);
- void Beqzc(GpuRegister rs, Label* label);
- void Bnezc(GpuRegister rs, Label* label);
+
+ void Bind(Mips64Label* label);
+ void Bc(Mips64Label* label);
+ void Jialc(Mips64Label* label, GpuRegister indirect_reg);
+ void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+ void Bltzc(GpuRegister rt, Mips64Label* label);
+ void Bgtzc(GpuRegister rt, Mips64Label* label);
+ void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+ void Bgezc(GpuRegister rt, Mips64Label* label);
+ void Blezc(GpuRegister rt, Mips64Label* label);
+ void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+ void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+ void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+ void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+ void Beqzc(GpuRegister rs, Mips64Label* label);
+ void Bnezc(GpuRegister rs, Mips64Label* label);
void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -277,43 +328,42 @@ class Mips64Assembler FINAL : public Assembler {
void Emit(uint32_t value);
//
- // Overridden common assembler high-level functionality
+ // Overridden common assembler high-level functionality.
//
- // Emit code that will create an activation on the stack
+ // Emit code that will create an activation on the stack.
void BuildFrame(size_t frame_size, ManagedRegister method_reg,
const std::vector<ManagedRegister>& callee_save_regs,
const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
- // Emit code that will remove an activation from the stack
+ // Emit code that will remove an activation from the stack.
void RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
void IncreaseFrameSize(size_t adjust) OVERRIDE;
void DecreaseFrameSize(size_t adjust) OVERRIDE;
- // Store routines
+ // Store routines.
void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
- void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
- ManagedRegister mscratch) OVERRIDE;
-
- void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+ void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
ManagedRegister mscratch) OVERRIDE;
- void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
+ void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE;
void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
ManagedRegister mscratch) OVERRIDE;
- // Load routines
+ // Load routines.
void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
- void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE;
+ void LoadFromThread64(ManagedRegister mdest,
+ ThreadOffset<kMipsDoublewordSize> src,
+ size_t size) OVERRIDE;
void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
@@ -322,15 +372,16 @@ class Mips64Assembler FINAL : public Assembler {
void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
- void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE;
+ void LoadRawPtrFromThread64(ManagedRegister mdest,
+ ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE;
- // Copying routines
+ // Copying routines.
void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
- void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
+ void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs,
ManagedRegister mscratch) OVERRIDE;
- void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+ void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
ManagedRegister mscratch) OVERRIDE;
void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
@@ -354,13 +405,13 @@ class Mips64Assembler FINAL : public Assembler {
void MemoryBarrier(ManagedRegister) OVERRIDE;
- // Sign extension
+ // Sign extension.
void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
- // Zero extension
+ // Zero extension.
void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
- // Exploit fast access in managed code to Thread::Current()
+ // Exploit fast access in managed code to Thread::Current().
void GetCurrentThread(ManagedRegister tr) OVERRIDE;
void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
@@ -376,7 +427,7 @@ class Mips64Assembler FINAL : public Assembler {
void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
mscratch, bool null_allowed) OVERRIDE;
- // src holds a handle scope entry (Object**) load this into dst
+ // src holds a handle scope entry (Object**) load this into dst.
void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
// Heap::VerifyObject on src. In some cases (such as a reference to this) we
@@ -384,37 +435,253 @@ class Mips64Assembler FINAL : public Assembler {
void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
- // Call to address held at [base+offset]
+ // Call to address held at [base+offset].
void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
- void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE;
+ void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset,
+ ManagedRegister mscratch) OVERRIDE;
// Generate code to check if Thread::Current()->exception_ is non-null
// and branch to a ExceptionSlowPath if it is.
void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
+ // Emit slow paths queued during assembly and promote short branches to long if needed.
+ void FinalizeCode() OVERRIDE;
+
+ // Emit branches and finalize all instructions.
+ void FinalizeInstructions(const MemoryRegion& region);
+
+ // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64,
+ // must be used instead of Mips64Label::GetPosition()).
+ uint32_t GetLabelLocation(Mips64Label* label) const;
+
+ // Get the final position of a label after local fixup based on the old position
+ // recorded before FinalizeCode().
+ uint32_t GetAdjustedPosition(uint32_t old_position);
+
+ enum BranchCondition {
+ kCondLT,
+ kCondGE,
+ kCondLE,
+ kCondGT,
+ kCondLTZ,
+ kCondGEZ,
+ kCondLEZ,
+ kCondGTZ,
+ kCondEQ,
+ kCondNE,
+ kCondEQZ,
+ kCondNEZ,
+ kCondLTU,
+ kCondGEU,
+ kUncond,
+ };
+ friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+
private:
+ class Branch {
+ public:
+ enum Type {
+ // Short branches.
+ kUncondBranch,
+ kCondBranch,
+ kCall,
+ // Long branches.
+ kLongUncondBranch,
+ kLongCondBranch,
+ kLongCall,
+ };
+
+ // Bit sizes of offsets defined as enums to minimize chance of typos.
+ enum OffsetBits {
+ kOffset16 = 16,
+ kOffset18 = 18,
+ kOffset21 = 21,
+ kOffset23 = 23,
+ kOffset28 = 28,
+ kOffset32 = 32,
+ };
+
+ static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_
+ static constexpr int32_t kMaxBranchLength = 32;
+ static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+
+ struct BranchInfo {
+ // Branch length as a number of 4-byte-long instructions.
+ uint32_t length;
+ // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
+ // PC-relative offset (or its most significant 16-bit half, which goes first).
+ uint32_t instr_offset;
+ // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
+ // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
+ // instructions) from the instruction containing the offset.
+ uint32_t pc_org;
+ // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
+ // an exception: use kOffset23 for beqzc/bnezc).
+ OffsetBits offset_size;
+ // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
+ // count.
+ int offset_shift;
+ };
+ static const BranchInfo branch_info_[/* Type */];
+
+ // Unconditional branch.
+ Branch(uint32_t location, uint32_t target);
+ // Conditional branch.
+ Branch(uint32_t location,
+ uint32_t target,
+ BranchCondition condition,
+ GpuRegister lhs_reg,
+ GpuRegister rhs_reg = ZERO);
+ // Call (branch and link) that stores the target address in a given register (i.e. T9).
+ Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg);
+
+ // Some conditional branches with lhs = rhs are effectively NOPs, while some
+ // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
+ // So, we need a way to identify such branches in order to emit no instructions for them
+ // or change them to unconditional.
+ static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+ static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+
+ static BranchCondition OppositeCondition(BranchCondition cond);
+
+ Type GetType() const;
+ BranchCondition GetCondition() const;
+ GpuRegister GetLeftRegister() const;
+ GpuRegister GetRightRegister() const;
+ uint32_t GetTarget() const;
+ uint32_t GetLocation() const;
+ uint32_t GetOldLocation() const;
+ uint32_t GetLength() const;
+ uint32_t GetOldLength() const;
+ uint32_t GetSize() const;
+ uint32_t GetOldSize() const;
+ uint32_t GetEndLocation() const;
+ uint32_t GetOldEndLocation() const;
+ bool IsLong() const;
+ bool IsResolved() const;
+
+ // Returns the bit size of the signed offset that the branch instruction can handle.
+ OffsetBits GetOffsetSize() const;
+
+ // Calculates the distance between two byte locations in the assembler buffer and
+ // returns the number of bits needed to represent the distance as a signed integer.
+ //
+ // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
+ // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
+ //
+ // Composite branches (made of several instructions) with longer reach have 32-bit
+ // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
+ // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end,
+ // however. Consider the following implementation of a long unconditional branch, for
+ // example:
+ //
+ // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16
+ // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0)
+ //
+ // Both of the above instructions take 16-bit signed offsets as immediate operands.
+ // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+ // due to sign extension. This must be compensated for by incrementing offset_31_16
+ // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+ // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+ // Therefore, the long branch range is something like from PC - 0x80000000 to
+ // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
+ //
+ // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
+ // case with the addiu instruction and a 16 bit offset.
+ static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+ // Resolve a branch when the target is known.
+ void Resolve(uint32_t target);
+
+ // Relocate a branch by a given delta if needed due to expansion of this or another
+ // branch at a given location by this delta (just changes location_ and target_).
+ void Relocate(uint32_t expand_location, uint32_t delta);
+
+ // If the branch is short, changes its type to long.
+ void PromoteToLong();
+
+ // If necessary, updates the type by promoting a short branch to a long branch
+ // based on the branch location and target. Returns the amount (in bytes) by
+ // which the branch size has increased.
+ // max_short_distance caps the maximum distance between location_ and target_
+ // that is allowed for short branches. This is for debugging/testing purposes.
+ // max_short_distance = 0 forces all short branches to become long.
+ // Use the implicit default argument when not debugging/testing.
+ uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+
+ // Returns the location of the instruction(s) containing the offset.
+ uint32_t GetOffsetLocation() const;
+
+ // Calculates and returns the offset ready for encoding in the branch instruction(s).
+ uint32_t GetOffset() const;
+
+ private:
+ // Completes branch construction by determining and recording its type.
+ void InitializeType(bool is_call);
+ // Helper for the above.
+ void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
+
+ uint32_t old_location_; // Offset into assembler buffer in bytes.
+ uint32_t location_; // Offset into assembler buffer in bytes.
+ uint32_t target_; // Offset into assembler buffer in bytes.
+
+ GpuRegister lhs_reg_; // Left-hand side register in conditional branches or
+ // indirect call register.
+ GpuRegister rhs_reg_; // Right-hand side register in conditional branches.
+ BranchCondition condition_; // Condition for conditional branches.
+
+ Type type_; // Current type of the branch.
+ Type old_type_; // Initial type of the branch.
+ };
+ friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
+ friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
+
void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct);
void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct);
void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
- void EmitJ(int opcode, uint32_t addr26);
+ void EmitI26(int opcode, uint32_t imm26);
void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
+ void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
- DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
-};
+ void Buncond(Mips64Label* label);
+ void Bcond(Mips64Label* label,
+ BranchCondition condition,
+ GpuRegister lhs,
+ GpuRegister rhs = ZERO);
+ void Call(Mips64Label* label, GpuRegister indirect_reg);
+ void FinalizeLabeledBranch(Mips64Label* label);
-// Slowpath entered when Thread::Current()->_exception is non-null
-class Mips64ExceptionSlowPath FINAL : public SlowPath {
- public:
- Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
- : scratch_(scratch), stack_adjust_(stack_adjust) {}
- virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
- const Mips64ManagedRegister scratch_;
- const size_t stack_adjust_;
+ Branch* GetBranch(uint32_t branch_id);
+ const Branch* GetBranch(uint32_t branch_id) const;
+
+ void PromoteBranches();
+ void EmitBranch(Branch* branch);
+ void EmitBranches();
+ void PatchCFI();
+
+ // Emits exception block.
+ void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
+
+ // List of exception blocks to generate at the end of the code cache.
+ std::vector<Mips64ExceptionSlowPath> exception_blocks_;
+
+ std::vector<Branch> branches_;
+
+ // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+ bool overwriting_;
+ // The current overwrite location.
+ uint32_t overwrite_location_;
+
+ // Data for AdjustedPosition(), see the description there.
+ uint32_t last_position_adjustment_;
+ uint32_t last_old_position_;
+ uint32_t last_branch_id_;
+
+ DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
};
} // namespace mips64
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 4413906fd7..29a5a88316 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -24,6 +24,8 @@
#include "base/stl_util.h"
#include "utils/assembler_test.h"
+#define __ GetAssembler()->
+
namespace art {
struct MIPS64CpuRegisterCompare {
@@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
return "mips64";
}
+ std::string GetAssemblerCmdName() OVERRIDE {
+ // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details.
+ return "gcc";
+ }
+
std::string GetAssemblerParameters() OVERRIDE {
- return " --no-warn -march=mips64r6";
+ // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6
+ // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative
+ // branches in the .text section and so they require a relocation pass (there's a relocation
+ // section, .rela.text, that has the needed info to fix up the branches).
+ return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib";
+ }
+
+ void Pad(std::vector<uint8_t>& data) OVERRIDE {
+ // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+ // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+ // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+ // NOP is encoded as four zero bytes on MIPS.
+ size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+ data.insert(data.end(), pad_size, 0);
}
std::string GetDisassembleParameters() OVERRIDE {
@@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
return secondary_register_names_[reg];
}
+ std::string RepeatInsn(size_t count, const std::string& insn) {
+ std::string result;
+ for (; count != 0u; --count) {
+ result += insn;
+ }
+ return result;
+ }
+
+ void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+ mips64::Mips64Label*),
+ std::string instr_name) {
+ mips64::Mips64Label label;
+ (Base::GetAssembler()->*f)(mips64::A0, &label);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ (Base::GetAssembler()->*f)(mips64::A1, &label);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " $a0, 1f\n"
+ "nop\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ instr_name + " $a1, 1b\n"
+ "nop\n";
+ DriverStr(expected, instr_name);
+ }
+
+ void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+ mips64::GpuRegister,
+ mips64::Mips64Label*),
+ std::string instr_name) {
+ mips64::Mips64Label label;
+ (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " $a0, $a1, 1f\n"
+ "nop\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ instr_name + " $a2, $a3, 1b\n"
+ "nop\n";
+ DriverStr(expected, instr_name);
+ }
+
private:
std::vector<mips64::GpuRegister*> registers_;
std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_;
@@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) {
EXPECT_TRUE(CheckTools());
}
-
///////////////////
// FP Operations //
///////////////////
@@ -348,7 +432,203 @@ TEST_F(AssemblerMIPS64Test, CvtSW) {
////////////////
TEST_F(AssemblerMIPS64Test, Jalr) {
- DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+ DriverStr(".set noreorder\n" +
+ RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+}
+
+TEST_F(AssemblerMIPS64Test, Jialc) {
+ mips64::Mips64Label label1, label2;
+ __ Jialc(&label1, mips64::T9);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label1);
+ __ Jialc(&label2, mips64::T9);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label2);
+ __ Jialc(&label1, mips64::T9);
+
+ std::string expected =
+ ".set noreorder\n"
+ "lapc $t9, 1f\n"
+ "jialc $t9, 0\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ "lapc $t9, 2f\n"
+ "jialc $t9, 0\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ "lapc $t9, 1b\n"
+ "jialc $t9, 0\n";
+ DriverStr(expected, "Jialc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongJialc) {
+ mips64::Mips64Label label1, label2;
+ __ Jialc(&label1, mips64::T9);
+ constexpr uint32_t kAdduCount1 = (1u << 18) + 1;
+ for (uint32_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label1);
+ __ Jialc(&label2, mips64::T9);
+ constexpr uint32_t kAdduCount2 = (1u << 18) + 1;
+ for (uint32_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label2);
+ __ Jialc(&label1, mips64::T9);
+
+ uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic.
+ offset_forward1 <<= 2;
+ offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu.
+
+ uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic.
+ offset_forward2 <<= 2;
+ offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu.
+
+ uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu.
+
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+ "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+ "jialc $t9, 0\n" <<
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+ "1:\n"
+ "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+ "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+ "jialc $t9, 0\n" <<
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+ "2:\n"
+ "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "jialc $t9, 0\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongJialc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc) {
+ mips64::Mips64Label label1, label2;
+ __ Bc(&label1);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label1);
+ __ Bc(&label2);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label2);
+ __ Bc(&label1);
+
+ std::string expected =
+ ".set noreorder\n"
+ "bc 1f\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ "bc 2f\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ "bc 1b\n";
+ DriverStr(expected, "Bc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqzc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnezc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltzc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgezc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Blezc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgtzc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnec) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgec) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltuc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgeuc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongBeqc) {
+ mips64::Mips64Label label;
+ __ Beqc(mips64::A0, mips64::A1, &label);
+ constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+ for (uint32_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+ constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+ for (uint32_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Beqc(mips64::A2, mips64::A3, &label);
+
+ uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic.
+ offset_forward <<= 2;
+ offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic.
+
+ uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic.
+
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "bnec $a0, $a1, 1f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+ "1:\n" <<
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+ "2:\n" <<
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+ "bnec $a2, $a3, 3f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "3:\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBeqc");
}
//////////