Thumb2: Clean up 16-bit LDR/STR detection.
The 16-bit LDR/STR sp, #N can be used for N = (imm8 << 2)
but it was previously used only for N < 128. This cleans up
the 16-bit instruction availability calculation for this
case as well as for byte and half word loads and stores.
However, only the LDR/STR sp, #N has impact on the code we
currently generate in Optimizing.
Reduces aosp_hammerhead-userdebug boot.oat by 108KiB.
Test: Run ART test suite on Nexus 5.
Change-Id: Ie06d130a48efad10c23f8ff2596a51efd98d0761
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 16b4386..da72c75 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -1,8 +1,7 @@
static constexpr uint8_t expected_asm_kThumb2[] = {
0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
- 0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
- 0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
- 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+ 0x21, 0x91, 0x8D, 0xED, 0x22, 0x0A, 0x23, 0x92, 0x24, 0x93, 0x88, 0xB0,
+ 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
};
static constexpr uint8_t expected_cfi_kThumb2[] = {
0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
@@ -11,7 +10,7 @@
0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
- 0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+ 0x4E, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
@@ -47,38 +46,38 @@
// 0x00000008: sub sp, sp, #36
// 0x0000000a: .cfi_def_cfa_offset: 128
// 0x0000000a: str r0, [sp, #0]
-// 0x0000000c: str.w r1, [sp, #132]
-// 0x00000010: vstr.f32 s0, [sp, #136]
-// 0x00000014: str.w r2, [sp, #140]
-// 0x00000018: str.w r3, [sp, #144]
-// 0x0000001c: sub sp, sp, #32
-// 0x0000001e: .cfi_def_cfa_offset: 160
-// 0x0000001e: add sp, sp, #32
-// 0x00000020: .cfi_def_cfa_offset: 128
-// 0x00000020: .cfi_remember_state
-// 0x00000020: add sp, sp, #36
-// 0x00000022: .cfi_def_cfa_offset: 92
-// 0x00000022: vpop.f32 {s16-s31}
-// 0x00000026: .cfi_def_cfa_offset: 28
-// 0x00000026: .cfi_restore_extended: r80
-// 0x00000026: .cfi_restore_extended: r81
-// 0x00000026: .cfi_restore_extended: r82
-// 0x00000026: .cfi_restore_extended: r83
-// 0x00000026: .cfi_restore_extended: r84
-// 0x00000026: .cfi_restore_extended: r85
-// 0x00000026: .cfi_restore_extended: r86
-// 0x00000026: .cfi_restore_extended: r87
-// 0x00000026: .cfi_restore_extended: r88
-// 0x00000026: .cfi_restore_extended: r89
-// 0x00000026: .cfi_restore_extended: r90
-// 0x00000026: .cfi_restore_extended: r91
-// 0x00000026: .cfi_restore_extended: r92
-// 0x00000026: .cfi_restore_extended: r93
-// 0x00000026: .cfi_restore_extended: r94
-// 0x00000026: .cfi_restore_extended: r95
-// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
-// 0x0000002a: .cfi_restore_state
-// 0x0000002a: .cfi_def_cfa_offset: 128
+// 0x0000000c: str r1, [sp, #132]
+// 0x0000000e: vstr.f32 s0, [sp, #136]
+// 0x00000012: str r2, [sp, #140]
+// 0x00000014: str r3, [sp, #144]
+// 0x00000016: sub sp, sp, #32
+// 0x00000018: .cfi_def_cfa_offset: 160
+// 0x00000018: add sp, sp, #32
+// 0x0000001a: .cfi_def_cfa_offset: 128
+// 0x0000001a: .cfi_remember_state
+// 0x0000001a: add sp, sp, #36
+// 0x0000001c: .cfi_def_cfa_offset: 92
+// 0x0000001c: vpop.f32 {s16-s31}
+// 0x00000020: .cfi_def_cfa_offset: 28
+// 0x00000020: .cfi_restore_extended: r80
+// 0x00000020: .cfi_restore_extended: r81
+// 0x00000020: .cfi_restore_extended: r82
+// 0x00000020: .cfi_restore_extended: r83
+// 0x00000020: .cfi_restore_extended: r84
+// 0x00000020: .cfi_restore_extended: r85
+// 0x00000020: .cfi_restore_extended: r86
+// 0x00000020: .cfi_restore_extended: r87
+// 0x00000020: .cfi_restore_extended: r88
+// 0x00000020: .cfi_restore_extended: r89
+// 0x00000020: .cfi_restore_extended: r90
+// 0x00000020: .cfi_restore_extended: r91
+// 0x00000020: .cfi_restore_extended: r92
+// 0x00000020: .cfi_restore_extended: r93
+// 0x00000020: .cfi_restore_extended: r94
+// 0x00000020: .cfi_restore_extended: r95
+// 0x00000020: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 128
static constexpr uint8_t expected_asm_kArm64[] = {
0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 353c729..4be7aae 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2325,7 +2325,7 @@
}
Register rn = ad.GetRegister();
- if (IsHighRegister(rn) && rn != SP && rn != PC) {
+ if (IsHighRegister(rn) && (byte || half || (rn != SP && rn != PC))) {
must_be_32bit = true;
}
@@ -2337,24 +2337,24 @@
// Immediate offset
int32_t offset = ad.GetOffset();
- // The 16 bit SP relative instruction can only have a 10 bit offset.
- if (rn == SP && offset >= (1 << 10)) {
- must_be_32bit = true;
- }
-
if (byte) {
// 5 bit offset, no shift.
- if (offset >= (1 << 5)) {
+ if ((offset & ~0x1f) != 0) {
must_be_32bit = true;
}
} else if (half) {
- // 6 bit offset, shifted by 1.
- if (offset >= (1 << 6)) {
+ // 5 bit offset, shifted by 1.
+ if ((offset & ~(0x1f << 1)) != 0) {
+ must_be_32bit = true;
+ }
+ } else if (rn == SP || rn == PC) {
+ // The 16 bit SP/PC relative instruction can only have an (imm8 << 2) offset.
+ if ((offset & ~(0xff << 2)) != 0) {
must_be_32bit = true;
}
} else {
- // 7 bit offset, shifted by 2.
- if (offset >= (1 << 7)) {
+ // 5 bit offset, shifted by 2.
+ if ((offset & ~(0x1f << 2)) != 0) {
must_be_32bit = true;
}
}
@@ -2370,7 +2370,7 @@
} else {
// 16 bit thumb1.
uint8_t opA = 0;
- bool sp_relative = false;
+ bool sp_or_pc_relative = false;
if (byte) {
opA = 7U /* 0b0111 */;
@@ -2379,7 +2379,10 @@
} else {
if (rn == SP) {
opA = 9U /* 0b1001 */;
- sp_relative = true;
+ sp_or_pc_relative = true;
+ } else if (rn == PC) {
+ opA = 4U;
+ sp_or_pc_relative = true;
} else {
opA = 6U /* 0b0110 */;
}
@@ -2388,7 +2391,7 @@
(load ? B11 : 0);
CHECK_GE(offset, 0);
- if (sp_relative) {
+ if (sp_or_pc_relative) {
// SP relative, 10 bit offset.
CHECK_LT(offset, (1 << 10));
CHECK_ALIGNED(offset, 4);
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index abb09f7..3ca3714 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -279,6 +279,148 @@
DriverStr(expected, "smull");
}
+TEST_F(AssemblerThumb2Test, LoadByteFromThumbOffset) {
+ arm::LoadOperandType type = arm::kLoadUnsignedByte;
+
+ __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+ __ LoadFromOffset(type, arm::R1, arm::R7, 31);
+ __ LoadFromOffset(type, arm::R2, arm::R7, 32);
+ __ LoadFromOffset(type, arm::R3, arm::R7, 4095);
+ __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+
+ const char* expected =
+ "ldrb r0, [r7, #0]\n"
+ "ldrb r1, [r7, #31]\n"
+ "ldrb.w r2, [r7, #32]\n"
+ "ldrb.w r3, [r7, #4095]\n"
+ "ldrb.w r4, [sp, #0]\n";
+ DriverStr(expected, "LoadByteFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreByteToThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreByte;
+
+ __ StoreToOffset(type, arm::R0, arm::R7, 0);
+ __ StoreToOffset(type, arm::R1, arm::R7, 31);
+ __ StoreToOffset(type, arm::R2, arm::R7, 32);
+ __ StoreToOffset(type, arm::R3, arm::R7, 4095);
+ __ StoreToOffset(type, arm::R4, arm::SP, 0);
+
+ const char* expected =
+ "strb r0, [r7, #0]\n"
+ "strb r1, [r7, #31]\n"
+ "strb.w r2, [r7, #32]\n"
+ "strb.w r3, [r7, #4095]\n"
+ "strb.w r4, [sp, #0]\n";
+ DriverStr(expected, "StoreByteToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadHalfFromThumbOffset) {
+ arm::LoadOperandType type = arm::kLoadUnsignedHalfword;
+
+ __ LoadFromOffset(type, arm::R0, arm::R7, 0);
+ __ LoadFromOffset(type, arm::R1, arm::R7, 62);
+ __ LoadFromOffset(type, arm::R2, arm::R7, 64);
+ __ LoadFromOffset(type, arm::R3, arm::R7, 4094);
+ __ LoadFromOffset(type, arm::R4, arm::SP, 0);
+ __ LoadFromOffset(type, arm::R5, arm::R7, 1); // Unaligned
+
+ const char* expected =
+ "ldrh r0, [r7, #0]\n"
+ "ldrh r1, [r7, #62]\n"
+ "ldrh.w r2, [r7, #64]\n"
+ "ldrh.w r3, [r7, #4094]\n"
+ "ldrh.w r4, [sp, #0]\n"
+ "ldrh.w r5, [r7, #1]\n";
+ DriverStr(expected, "LoadHalfFromThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreHalfToThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreHalfword;
+
+ __ StoreToOffset(type, arm::R0, arm::R7, 0);
+ __ StoreToOffset(type, arm::R1, arm::R7, 62);
+ __ StoreToOffset(type, arm::R2, arm::R7, 64);
+ __ StoreToOffset(type, arm::R3, arm::R7, 4094);
+ __ StoreToOffset(type, arm::R4, arm::SP, 0);
+ __ StoreToOffset(type, arm::R5, arm::R7, 1); // Unaligned
+
+ const char* expected =
+ "strh r0, [r7, #0]\n"
+ "strh r1, [r7, #62]\n"
+ "strh.w r2, [r7, #64]\n"
+ "strh.w r3, [r7, #4094]\n"
+ "strh.w r4, [sp, #0]\n"
+ "strh.w r5, [r7, #1]\n";
+ DriverStr(expected, "StoreHalfToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromSpPlusOffset) {
+ arm::LoadOperandType type = arm::kLoadWord;
+
+ __ LoadFromOffset(type, arm::R0, arm::SP, 0);
+ __ LoadFromOffset(type, arm::R1, arm::SP, 124);
+ __ LoadFromOffset(type, arm::R2, arm::SP, 128);
+ __ LoadFromOffset(type, arm::R3, arm::SP, 1020);
+ __ LoadFromOffset(type, arm::R4, arm::SP, 1024);
+ __ LoadFromOffset(type, arm::R5, arm::SP, 4092);
+ __ LoadFromOffset(type, arm::R6, arm::SP, 1); // Unaligned
+
+ const char* expected =
+ "ldr r0, [sp, #0]\n"
+ "ldr r1, [sp, #124]\n"
+ "ldr r2, [sp, #128]\n"
+ "ldr r3, [sp, #1020]\n"
+ "ldr.w r4, [sp, #1024]\n"
+ "ldr.w r5, [sp, #4092]\n"
+ "ldr.w r6, [sp, #1]\n";
+ DriverStr(expected, "LoadWordFromSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToSpPlusOffset) {
+ arm::StoreOperandType type = arm::kStoreWord;
+
+ __ StoreToOffset(type, arm::R0, arm::SP, 0);
+ __ StoreToOffset(type, arm::R1, arm::SP, 124);
+ __ StoreToOffset(type, arm::R2, arm::SP, 128);
+ __ StoreToOffset(type, arm::R3, arm::SP, 1020);
+ __ StoreToOffset(type, arm::R4, arm::SP, 1024);
+ __ StoreToOffset(type, arm::R5, arm::SP, 4092);
+ __ StoreToOffset(type, arm::R6, arm::SP, 1); // Unaligned
+
+ const char* expected =
+ "str r0, [sp, #0]\n"
+ "str r1, [sp, #124]\n"
+ "str r2, [sp, #128]\n"
+ "str r3, [sp, #1020]\n"
+ "str.w r4, [sp, #1024]\n"
+ "str.w r5, [sp, #4092]\n"
+ "str.w r6, [sp, #1]\n";
+ DriverStr(expected, "StoreWordToSpPlusOffset");
+}
+
+TEST_F(AssemblerThumb2Test, LoadWordFromPcPlusOffset) {
+ arm::LoadOperandType type = arm::kLoadWord;
+
+ __ LoadFromOffset(type, arm::R0, arm::PC, 0);
+ __ LoadFromOffset(type, arm::R1, arm::PC, 124);
+ __ LoadFromOffset(type, arm::R2, arm::PC, 128);
+ __ LoadFromOffset(type, arm::R3, arm::PC, 1020);
+ __ LoadFromOffset(type, arm::R4, arm::PC, 1024);
+ __ LoadFromOffset(type, arm::R5, arm::PC, 4092);
+ __ LoadFromOffset(type, arm::R6, arm::PC, 1); // Unaligned
+
+ const char* expected =
+ "ldr r0, [pc, #0]\n"
+ "ldr r1, [pc, #124]\n"
+ "ldr r2, [pc, #128]\n"
+ "ldr r3, [pc, #1020]\n"
+ "ldr.w r4, [pc, #1024]\n"
+ "ldr.w r5, [pc, #4092]\n"
+ "ldr.w r6, [pc, #1]\n";
+ DriverStr(expected, "LoadWordFromPcPlusOffset");
+}
+
TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
arm::StoreOperandType type = arm::kStoreWord;
int32_t offset = 4092;