diff options
author | 2015-11-19 21:13:52 +0000 | |
---|---|---|
committer | 2015-11-20 11:55:49 +0000 | |
commit | 6fd0ffe8da212723a3ac0256ce350b5872cc61d4 (patch) | |
tree | 122c89d874460662d3feba523cb9f2553fb78bd3 | |
parent | beb709a2607a00b5df33f0235f22ccdd876cee22 (diff) |
Optimizing/Thumb2: Improve load/store for large offsets.
This reduces the boot.oat size on Nexus 5 by 568KiB (0.8%).
Also change 32-bit ADD/SUB immediate to use the recommended
encoding T3 when both T3 and T4 are available.
Change-Id: I174382bda2b22da70560b947f5536acf8c1814a9
-rw-r--r-- | compiler/utils/arm/assembler_arm.cc | 8 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_arm32.h | 2 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2.cc | 127 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2.h | 14 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2_test.cc | 40 | ||||
-rw-r--r-- | compiler/utils/assembler_thumb_test.cc | 71 | ||||
-rw-r--r-- | compiler/utils/assembler_thumb_test_expected.cc.inc | 123 |
7 files changed, 302 insertions, 83 deletions
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 68e39568bb..dead8fd9a8 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -342,9 +342,9 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kLoadSWord: case kLoadDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kLoadWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -360,9 +360,9 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kStoreSWord: case kStoreDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kStoreWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 5233dcbbb0..ce3a87275d 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -389,8 +389,6 @@ class Arm32Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link); static int32_t EncodeBranchOffset(int offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op); }; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 297cc54e29..5f82439369 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1349,7 +1349,8 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, int32_t encoding = 0; if (so.IsImmediate()) { // Check special cases. - if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) { + if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) && + /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) { if (set_cc != kCcSet) { if (opcode == SUB) { thumb_opcode = 5U; @@ -3469,6 +3470,73 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) } } +int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) { + switch (type) { + case kLoadSignedByte: + case kLoadSignedHalfword: + case kLoadUnsignedHalfword: + case kLoadUnsignedByte: + case kLoadWord: + // We can encode imm12 offset. + return 0xfffu; + case kLoadSWord: + case kLoadDWord: + case kLoadWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) { + switch (type) { + case kStoreHalfword: + case kStoreByte: + case kStoreWord: + // We can encode imm12 offset. + return 0xfff; + case kStoreSWord: + case kStoreDWord: + case kStoreWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store) { + int32_t other_bits = offset & ~allowed_offset_bits; + if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { + *add_to_base = offset & ~allowed_offset_bits; + *offset_for_load_store = offset & allowed_offset_bits; + return true; + } + return false; +} + +int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond) { + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + AddConstant(temp, base, add_to_base, cond, kCcKeep); + return offset_for_load; + } else { + LoadImmediate(temp, offset, cond); + add(temp, temp, ShifterOperand(base), cond, kCcKeep); + return 0; + } +} // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb. @@ -3479,12 +3547,26 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(type, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; + // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. + int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + AddConstant(reg, base, add_to_base, cond, kCcKeep); + base = reg; + offset = offset_for_load; + } else { + Register temp = (reg == base) ? IP : reg; + LoadImmediate(temp, offset, cond); + // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep); + base = reg; + offset = 0; + } } - CHECK(Address::CanHoldLoadOffsetThumb(type, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(type, offset)); switch (type) { case kLoadSignedByte: ldrsb(reg, Address(base, offset), cond); @@ -3510,7 +3592,6 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, } } - // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset. void Thumb2Assembler::LoadSFromOffset(SRegister reg, @@ -3519,12 +3600,10 @@ void Thumb2Assembler::LoadSFromOffset(SRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); vldrs(reg, Address(base, offset), cond); } @@ -3537,12 +3616,10 @@ void Thumb2Assembler::LoadDFromOffset(DRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); vldrd(reg, Address(base, offset), cond); } @@ -3573,12 +3650,12 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, offset += kRegisterSize; } } - LoadImmediate(tmp_reg, offset, cond); - add(tmp_reg, tmp_reg, ShifterOperand(base), AL); + // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() + // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond); base = tmp_reg; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(type, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(type, offset)); switch (type) { case kStoreByte: strb(reg, Address(base, offset), cond); @@ -3611,12 +3688,10 @@ void Thumb2Assembler::StoreSToOffset(SRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); vstrs(reg, Address(base, offset), cond); } @@ -3629,12 +3704,10 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); vstrd(reg, Address(base, offset), cond); } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index e18361300a..9aeece8e57 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -729,13 +729,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, Condition cond = AL, SetCc set_cc = kCcDontCare); void EmitShift(Register rd, Register rn, Shift shift, Register rm, Condition cond = AL, SetCc set_cc = kCcDontCare); + static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); + static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); + bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store); + int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond); + // Whether the assembler can relocate branches. If false, unresolved branches will be // emitted on 32bits. bool can_relocate_branches_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index cb4b20b5ba..7b32b0fd26 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -243,7 +243,7 @@ TEST_F(AssemblerThumb2Test, sub) { const char* expected = "subs r1, r0, #42\n" - "subw r1, r0, #42\n" + "sub.w r1, r0, #42\n" "subs r1, r0, r2, asr #31\n" "sub r1, r0, r2, asr #31\n"; DriverStr(expected, "sub"); @@ -257,7 +257,7 @@ TEST_F(AssemblerThumb2Test, add) { const char* expected = "adds r1, r0, #42\n" - "addw r1, r0, #42\n" + "add.w r1, r0, #42\n" "adds r1, r0, r2, asr #31\n" "add r1, r0, r2, asr #31\n"; DriverStr(expected, "add"); @@ -305,21 +305,18 @@ TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { __ StoreToOffset(type, arm::IP, arm::R5, offset); const char* expected = - "mov ip, #4096\n" // LoadImmediate(ip, 4096) - "add ip, ip, sp\n" + "add.w ip, sp, #4096\n" // AddConstant(ip, sp, 4096) "str r0, [ip, #0]\n" - "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize) - "add r5, r5, sp\n" - "str ip, [r5, #0]\n" - "ldr r5, [sp], #4\n" // Pop(r5) - - "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #4096\n" // LoadImmediate(r6, 4096) - "add r6, r6, r5\n" - "str ip, [r6, #0]\n" - "ldr r6, [sp], #4\n"; // Pop(r6) + "str r5, [sp, #-4]!\n" // Push(r5) + "add.w r5, sp, #4096\n" // AddConstant(r5, 4100 & ~0xfff) + "str ip, [r5, #4]\n" // StoreToOffset(type, ip, r5, 4100 & 0xfff) + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "add.w r6, r5, #4096\n" // AddConstant(r6, r5, 4096 & ~0xfff) + "str ip, [r6, #0]\n" // StoreToOffset(type, ip, r6, 4096 & 0xfff) + "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordToNonThumbOffset"); } @@ -360,20 +357,17 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { __ StoreToOffset(type, arm::R11, arm::R5, offset); const char* expected = - "mov ip, #1024\n" // LoadImmediate(ip, 1024) - "add ip, ip, sp\n" + "add.w ip, sp, #1024\n" // AddConstant(ip, sp, 1024) "strd r0, r1, [ip, #0]\n" "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) - "add r5, r5, sp\n" - "strd r11, ip, [r5, #0]\n" + "add.w r5, sp, #1024\n" // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc) + "strd r11, ip, [r5, #4]\n" // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc) "ldr r5, [sp], #4\n" // Pop(r5) "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #1024\n" // LoadImmediate(r6, 1024) - "add r6, r6, r5\n" - "strd r11, ip, [r6, #0]\n" + "add.w r6, r5, #1024\n" // AddConstant(r6, r5, 1024 & ~0x3fc) + "strd r11, ip, [r6, #0]\n" // StoreToOffset(type, r11, r6, 1024 & 0x3fc) "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordPairToNonThumbOffset"); } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 2ae88413e7..ef11282084 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -823,29 +823,80 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ add(R2, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. __ add(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. + __ add(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. - __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit - __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit - __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. + __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit + __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit + __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. - __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. EmitAndCheck(&assembler, "SpecialAddSub"); } +TEST(Thumb2AssemblerTest, LoadFromOffset) { + arm::Thumb2Assembler assembler; + + __ LoadFromOffset(kLoadWord, R2, R4, 12); + __ LoadFromOffset(kLoadWord, R2, R4, 0xfff); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadWord, R2, R4, 0x101000); + __ LoadFromOffset(kLoadWord, R4, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000); + __ LoadFromOffset(kLoadWordPair, R2, R4, 12); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400); + __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400); + + __ LoadFromOffset(kLoadWord, R0, R12, 12); // 32-bit because of R12. + __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000); + + __ LoadFromOffset(kLoadSignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12); + + EmitAndCheck(&assembler, "LoadFromOffset"); +} + TEST(Thumb2AssemblerTest, StoreToOffset) { arm::Thumb2Assembler assembler; - __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple - __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. - __ StoreToOffset(kStoreWord, R0, R12, 12); - __ StoreToOffset(kStoreHalfword, R0, R12, 12); - __ StoreToOffset(kStoreByte, R2, R12, 12); + __ StoreToOffset(kStoreWord, R2, R4, 12); + __ StoreToOffset(kStoreWord, R2, R4, 0xfff); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreWord, R2, R4, 0x101000); + __ StoreToOffset(kStoreWord, R4, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R2, R4, 12); + __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000); + __ StoreToOffset(kStoreWordPair, R2, R4, 12); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400); + __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400); + + __ StoreToOffset(kStoreWord, R0, R12, 12); // 32-bit because of R12. + __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000); + + __ StoreToOffset(kStoreByte, R2, R4, 12); EmitAndCheck(&assembler, "StoreToOffset"); } - TEST(Thumb2AssemblerTest, IfThen) { arm::Thumb2Assembler assembler; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index b79c2e46f0..6c0ff1e89d 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -132,8 +132,8 @@ const char* DataProcessingRegisterResults[] = { const char* DataProcessingImmediateResults[] = { " 0: 2055 movs r0, #85 ; 0x55\n", " 2: f06f 0055 mvn.w r0, #85 ; 0x55\n", - " 6: f201 0055 addw r0, r1, #85 ; 0x55\n", - " a: f2a1 0055 subw r0, r1, #85 ; 0x55\n", + " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n", + " a: f1a1 0055 sub.w r0, r1, #85 ; 0x55\n", " e: f001 0055 and.w r0, r1, #85 ; 0x55\n", " 12: f041 0055 orr.w r0, r1, #85 ; 0x55\n", " 16: f061 0055 orn r0, r1, #85 ; 0x55\n", @@ -434,23 +434,115 @@ const char* MovWMovTResults[] = { const char* SpecialAddSubResults[] = { " 0: aa14 add r2, sp, #80 ; 0x50\n", " 2: b014 add sp, #80 ; 0x50\n", - " 4: f20d 0850 addw r8, sp, #80 ; 0x50\n", - " 8: f60d 7200 addw r2, sp, #3840 ; 0xf00\n", - " c: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n", - " 10: b094 sub sp, #80 ; 0x50\n", - " 12: f2ad 0050 subw r0, sp, #80 ; 0x50\n", - " 16: f2ad 0850 subw r8, sp, #80 ; 0x50\n", - " 1a: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n", + " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n", + " 8: f50d 6270 add.w r2, sp, #3840 ; 0xf00\n", + " c: f50d 6d70 add.w sp, sp, #3840 ; 0xf00\n", + " 10: f60d 7dfc addw sp, sp, #4092 ; 0xffc\n", + " 14: b094 sub sp, #80 ; 0x50\n", + " 16: f1ad 0050 sub.w r0, sp, #80 ; 0x50\n", + " 1a: f1ad 0850 sub.w r8, sp, #80 ; 0x50\n", + " 1e: f5ad 6d70 sub.w sp, sp, #3840 ; 0xf00\n", + " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n", + nullptr +}; +const char* LoadFromOffsetResults[] = { + " 0: 68e2 ldr r2, [r4, #12]\n", + " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " a: 6812 ldr r2, [r2, #0]\n", + " c: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 10: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 14: f241 0200 movw r2, #4096 ; 0x1000\n", + " 18: f2c0 0210 movt r2, #16\n", + " 1c: 4422 add r2, r4\n", + " 1e: 6812 ldr r2, [r2, #0]\n", + " 20: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 24: f2c0 0c10 movt ip, #16\n", + " 28: 4464 add r4, ip\n", + " 2a: 6824 ldr r4, [r4, #0]\n", + " 2c: 89a2 ldrh r2, [r4, #12]\n", + " 2e: f8b4 2fff ldrh.w r2, [r4, #4095] ; 0xfff\n", + " 32: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " 36: 8812 ldrh r2, [r2, #0]\n", + " 38: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 3c: f8b2 20a4 ldrh.w r2, [r2, #164] ; 0xa4\n", + " 40: f241 0200 movw r2, #4096 ; 0x1000\n", + " 44: f2c0 0210 movt r2, #16\n", + " 48: 4422 add r2, r4\n", + " 4a: 8812 ldrh r2, [r2, #0]\n", + " 4c: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 50: f2c0 0c10 movt ip, #16\n", + " 54: 4464 add r4, ip\n", + " 56: 8824 ldrh r4, [r4, #0]\n", + " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n", + " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020] ; 0x3fc\n", + " 60: f504 6280 add.w r2, r4, #1024 ; 0x400\n", + " 64: e9d2 2300 ldrd r2, r3, [r2]\n", + " 68: f504 2280 add.w r2, r4, #262144 ; 0x40000\n", + " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]; 0xa4\n", + " 70: f240 4200 movw r2, #1024 ; 0x400\n", + " 74: f2c0 0204 movt r2, #4\n", + " 78: 4422 add r2, r4\n", + " 7a: e9d2 2300 ldrd r2, r3, [r2]\n", + " 7e: f240 4c00 movw ip, #1024 ; 0x400\n", + " 82: f2c0 0c04 movt ip, #4\n", + " 86: 4464 add r4, ip\n", + " 88: e9d4 4500 ldrd r4, r5, [r4]\n", + " 8c: f8dc 000c ldr.w r0, [ip, #12]\n", + " 90: f5a4 1280 sub.w r2, r4, #1048576 ; 0x100000\n", + " 94: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 98: f994 200c ldrsb.w r2, [r4, #12]\n", + " 9c: 7b22 ldrb r2, [r4, #12]\n", + " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", nullptr }; const char* StoreToOffsetResults[] = { " 0: 60e2 str r2, [r4, #12]\n", - " 2: f44f 5c00 mov.w ip, #8192 ; 0x2000\n", - " 6: 44a4 add ip, r4\n", - " 8: f8cc 2000 str.w r2, [ip]\n", - " c: f8cc 000c str.w r0, [ip, #12]\n", - " 10: f8ac 000c strh.w r0, [ip, #12]\n", - " 14: f88c 200c strb.w r2, [ip, #12]\n", + " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " a: f8cc 2000 str.w r2, [ip]\n", + " e: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 12: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " 16: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 1a: f2c0 0c10 movt ip, #16\n", + " 1e: 44a4 add ip, r4\n", + " 20: f8cc 2000 str.w r2, [ip]\n", + " 24: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 28: f2c0 0c10 movt ip, #16\n", + " 2c: 44a4 add ip, r4\n", + " 2e: f8cc 4000 str.w r4, [ip]\n", + " 32: 81a2 strh r2, [r4, #12]\n", + " 34: f8a4 2fff strh.w r2, [r4, #4095] ; 0xfff\n", + " 38: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " 3c: f8ac 2000 strh.w r2, [ip]\n", + " 40: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 44: f8ac 20a4 strh.w r2, [ip, #164] ; 0xa4\n", + " 48: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 4c: f2c0 0c10 movt ip, #16\n", + " 50: 44a4 add ip, r4\n", + " 52: f8ac 2000 strh.w r2, [ip]\n", + " 56: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 5a: f2c0 0c10 movt ip, #16\n", + " 5e: 44a4 add ip, r4\n", + " 60: f8ac 4000 strh.w r4, [ip]\n", + " 64: e9c4 2303 strd r2, r3, [r4, #12]\n", + " 68: e9c4 23ff strd r2, r3, [r4, #1020] ; 0x3fc\n", + " 6c: f504 6c80 add.w ip, r4, #1024 ; 0x400\n", + " 70: e9cc 2300 strd r2, r3, [ip]\n", + " 74: f504 2c80 add.w ip, r4, #262144 ; 0x40000\n", + " 78: e9cc 2329 strd r2, r3, [ip, #164]; 0xa4\n", + " 7c: f240 4c00 movw ip, #1024 ; 0x400\n", + " 80: f2c0 0c04 movt ip, #4\n", + " 84: 44a4 add ip, r4\n", + " 86: e9cc 2300 strd r2, r3, [ip]\n", + " 8a: f240 4c00 movw ip, #1024 ; 0x400\n", + " 8e: f2c0 0c04 movt ip, #4\n", + " 92: 44a4 add ip, r4\n", + " 94: e9cc 4500 strd r4, r5, [ip]\n", + " 98: f8cc 000c str.w r0, [ip, #12]\n", + " 9c: f5a4 1c80 sub.w ip, r4, #1048576 ; 0x100000\n", + " a0: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " a4: 7322 strb r2, [r4, #12]\n", nullptr }; const char* IfThenResults[] = { @@ -4966,6 +5058,7 @@ void setup_results() { test_results["StoreMultiple"] = StoreMultipleResults; test_results["MovWMovT"] = MovWMovTResults; test_results["SpecialAddSub"] = SpecialAddSubResults; + test_results["LoadFromOffset"] = LoadFromOffsetResults; test_results["StoreToOffset"] = StoreToOffsetResults; test_results["IfThen"] = IfThenResults; test_results["CbzCbnz"] = CbzCbnzResults; |