diff options
Diffstat (limited to 'compiler')
29 files changed, 282 insertions, 185 deletions
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index 5bf77aae55..303ea3e306 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -35,14 +35,15 @@ namespace art { * r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by * the linker, by the trampolines and other stubs (the backend uses * these as temporary registers). - * r18 : (rxSELF) is reserved (pointer to thread-local storage). - * r19-r29: Callee save registers (promotion targets). + * r18 : Caller save register (used as temporary register). + * r19 : (rxSELF) is reserved (pointer to thread-local storage). + * r20-r29: Callee save registers (promotion targets). * r30 : (lr) is reserved (the link register). * rsp : (sp) is reserved (the stack pointer). * rzr : (zr) is reserved (the zero register). * - * 18 core temps that codegen can use (r0-r17). - * 10 core registers that can be used for promotion. + * 19 core temps that codegen can use (r0-r18). + * 9 core registers that can be used for promotion. * * Floating-point registers * v0-v31 @@ -145,7 +146,7 @@ enum A64NativeRegisterPool { // private marker to avoid generate-operator-out.p // Aliases which are not defined in "ARM Architecture Reference, register names". rxIP0 = rx16, rxIP1 = rx17, - rxSELF = rx18, + rxSELF = rx19, rxLR = rx30, /* * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use @@ -154,7 +155,7 @@ enum A64NativeRegisterPool { // private marker to avoid generate-operator-out.p */ rwIP0 = rw16, rwIP1 = rw17, - rwSELF = rw18, + rwSELF = rw19, rwLR = rw30, }; diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index fc32ecd955..fe15391e2c 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -51,19 +51,17 @@ static constexpr RegStorage dp_regs_arr[] = rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15, rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23, rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31}; -// Note: we are not able to call to C function since rs_xSELF is a special register need to be -// preserved but would be scratched by native functions follow aapcs64. static constexpr RegStorage reserved_regs_arr[] = {rs_wSELF, rs_wsp, rs_wLR, rs_wzr}; static constexpr RegStorage reserved64_regs_arr[] = {rs_xSELF, rs_sp, rs_xLR, rs_xzr}; static constexpr RegStorage core_temps_arr[] = {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7, rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16, - rs_w17}; + rs_w17, rs_w18}; static constexpr RegStorage core64_temps_arr[] = {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7, rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16, - rs_x17}; + rs_x17, rs_x18}; static constexpr RegStorage sp_temps_arr[] = {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7, rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23, @@ -691,6 +689,7 @@ void Arm64Mir2Lir::ClobberCallerSave() { Clobber(rs_x15); Clobber(rs_x16); Clobber(rs_x17); + Clobber(rs_x18); Clobber(rs_x30); Clobber(rs_f0); diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc index 48109d2f44..634fdeead0 100644 --- a/compiler/dex/quick/quick_cfi_test_expected.inc +++ b/compiler/dex/quick/quick_cfi_test_expected.inc @@ -33,15 +33,15 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000014: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF3, 0xD3, 0x02, 0xA9, + 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D, - 0xF3, 0xD3, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, + 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x93, - 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, - 0x49, 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, + 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94, + 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, + 0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; // 0x00000000: sub sp, sp, #0x40 (64) @@ -49,9 +49,9 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000004: stp d8, d9, [sp, #24] // 0x00000008: .cfi_offset_extended: r72 at cfa-40 // 0x00000008: .cfi_offset_extended: r73 at cfa-32 -// 0x00000008: stp x19, x20, [sp, #40] -// 0x0000000c: .cfi_offset: r19 at cfa-24 -// 0x0000000c: .cfi_offset: r20 at cfa-16 +// 0x00000008: stp x20, x21, [sp, #40] +// 0x0000000c: .cfi_offset: r20 at cfa-24 +// 0x0000000c: .cfi_offset: r21 at cfa-16 // 0x0000000c: str lr, [sp, #56] // 0x00000010: .cfi_offset: r30 at cfa-8 // 0x00000010: str w0, [sp] @@ -59,9 +59,9 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000014: ldp d8, d9, [sp, #24] // 0x00000018: .cfi_restore_extended: r72 // 0x00000018: .cfi_restore_extended: r73 -// 0x00000018: ldp x19, x20, [sp, #40] -// 0x0000001c: .cfi_restore: r19 +// 0x00000018: ldp x20, x21, [sp, #40] // 0x0000001c: .cfi_restore: r20 +// 0x0000001c: .cfi_restore: r21 // 0x0000001c: ldr lr, [sp, #56] // 0x00000020: .cfi_restore: r30 // 0x00000020: add sp, sp, #0x40 (64) diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 4945a91ff8..f988812d73 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -75,8 +75,8 @@ namespace art { static constexpr bool kTimeCompileMethod = !kIsDebugBuild; -// Whether to produce 64-bit ELF files for 64-bit targets. -static constexpr bool kProduce64BitELFFiles = true; +// Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now. +static constexpr bool kProduce64BitELFFiles = false; // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when // given, too all compilations. diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 195949bf3c..bf32febabe 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1006,7 +1006,7 @@ bool ImageWriter::CopyAndFixupIfDexCacheFieldArray(mirror::Object* dst, mirror:: // Fixup int pointers for the field array. CHECK(!arr->IsObjectArray()); const size_t num_elements = arr->GetLength(); - if (target_ptr_size_ == 4) { + if (target_ptr_size_ == 4u) { // Will get fixed up by fixup object. dst->SetClass(down_cast<mirror::Class*>( GetImageAddress(mirror::IntArray::GetArrayClass()))); @@ -1026,10 +1026,11 @@ bool ImageWriter::CopyAndFixupIfDexCacheFieldArray(mirror::Object* dst, mirror:: CHECK(it2 != art_field_reloc_.end()) << "No relocation for field " << PrettyField(field); fixup_location = image_begin_ + it2->second; } - if (target_ptr_size_ == 4) { + if (target_ptr_size_ == 4u) { down_cast<mirror::IntArray*>(dest_array)->SetWithoutChecks<kVerifyNone>( i, static_cast<uint32_t>(reinterpret_cast<uint64_t>(fixup_location))); } else { + DCHECK_EQ(target_ptr_size_, 8u); down_cast<mirror::LongArray*>(dest_array)->SetWithoutChecks<kVerifyNone>( i, reinterpret_cast<uint64_t>(fixup_location)); } diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index eaf7872a05..42fc30fb9e 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -84,14 +84,13 @@ static constexpr uint8_t expected_asm_kArm64[] = { 0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9, 0xF7, 0x63, 0x08, 0xA9, 0xF9, 0x6B, 0x09, 0xA9, 0xFB, 0x73, 0x0A, 0xA9, 0xFD, 0x7B, 0x0B, 0xA9, 0xE8, 0x27, 0x02, 0x6D, 0xEA, 0x2F, 0x03, 0x6D, - 0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xF5, 0x03, 0x12, 0xAA, - 0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, - 0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, - 0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xF3, 0x53, 0x46, 0xA9, - 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9, 0xF9, 0x6B, 0x49, 0xA9, - 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9, 0xE8, 0x27, 0x42, 0x6D, - 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D, 0xEE, 0x3F, 0x45, 0x6D, - 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6, + 0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xE0, 0x03, 0x00, 0xB9, + 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, 0xE2, 0xCF, 0x00, 0xB9, + 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, 0xFF, 0x83, 0x00, 0x91, + 0xF3, 0x53, 0x46, 0xA9, 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9, + 0xF9, 0x6B, 0x49, 0xA9, 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9, + 0xE8, 0x27, 0x42, 0x6D, 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D, + 0xEE, 0x3F, 0x45, 0x6D, 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x93, 0x18, 0x94, 0x16, 0x44, 0x95, 0x14, @@ -99,11 +98,11 @@ static constexpr uint8_t expected_cfi_kArm64[] = { 0x44, 0x9B, 0x08, 0x9C, 0x06, 0x44, 0x9D, 0x04, 0x9E, 0x02, 0x44, 0x05, 0x48, 0x28, 0x05, 0x49, 0x26, 0x44, 0x05, 0x4A, 0x24, 0x05, 0x4B, 0x22, 0x44, 0x05, 0x4C, 0x20, 0x05, 0x4D, 0x1E, 0x44, 0x05, 0x4E, 0x1C, 0x05, - 0x4F, 0x1A, 0x5C, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x0A, - 0x44, 0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, - 0x44, 0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, - 0x06, 0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, - 0x06, 0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, + 0x4F, 0x1A, 0x58, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x44, + 0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, 0x44, + 0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0x06, + 0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, 0x06, + 0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, }; // 0x00000000: sub sp, sp, #0xc0 (192) // 0x00000004: .cfi_def_cfa_offset: 192 @@ -137,53 +136,51 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000028: stp d14, d15, [sp, #80] // 0x0000002c: .cfi_offset_extended: r78 at cfa-112 // 0x0000002c: .cfi_offset_extended: r79 at cfa-104 -// 0x0000002c: mov x21, tr -// 0x00000030: str w0, [sp] -// 0x00000034: str w1, [sp, #196] -// 0x00000038: str s0, [sp, #200] -// 0x0000003c: str w2, [sp, #204] -// 0x00000040: str w3, [sp, #208] -// 0x00000044: sub sp, sp, #0x20 (32) -// 0x00000048: .cfi_def_cfa_offset: 224 -// 0x00000048: add sp, sp, #0x20 (32) -// 0x0000004c: .cfi_def_cfa_offset: 192 -// 0x0000004c: mov tr, x21 -// 0x00000050: .cfi_remember_state -// 0x00000050: ldp x19, x20, [sp, #96] -// 0x00000054: .cfi_restore: r19 -// 0x00000054: .cfi_restore: r20 -// 0x00000054: ldp x21, x22, [sp, #112] -// 0x00000058: .cfi_restore: r21 -// 0x00000058: .cfi_restore: r22 -// 0x00000058: ldp x23, x24, [sp, #128] -// 0x0000005c: .cfi_restore: r23 -// 0x0000005c: .cfi_restore: r24 -// 0x0000005c: ldp x25, x26, [sp, #144] -// 0x00000060: .cfi_restore: r25 -// 0x00000060: .cfi_restore: r26 -// 0x00000060: ldp x27, x28, [sp, #160] -// 0x00000064: .cfi_restore: r27 -// 0x00000064: .cfi_restore: r28 -// 0x00000064: ldp x29, lr, [sp, #176] -// 0x00000068: .cfi_restore: r29 -// 0x00000068: .cfi_restore: r30 -// 0x00000068: ldp d8, d9, [sp, #32] -// 0x0000006c: .cfi_restore_extended: r72 -// 0x0000006c: .cfi_restore_extended: r73 -// 0x0000006c: ldp d10, d11, [sp, #48] -// 0x00000070: .cfi_restore_extended: r74 -// 0x00000070: .cfi_restore_extended: r75 -// 0x00000070: ldp d12, d13, [sp, #64] -// 0x00000074: .cfi_restore_extended: r76 -// 0x00000074: .cfi_restore_extended: r77 -// 0x00000074: ldp d14, d15, [sp, #80] -// 0x00000078: .cfi_restore_extended: r78 -// 0x00000078: .cfi_restore_extended: r79 -// 0x00000078: add sp, sp, #0xc0 (192) -// 0x0000007c: .cfi_def_cfa_offset: 0 -// 0x0000007c: ret -// 0x00000080: .cfi_restore_state -// 0x00000080: .cfi_def_cfa_offset: 192 +// 0x0000002c: str w0, [sp] +// 0x00000030: str w1, [sp, #196] +// 0x00000034: str s0, [sp, #200] +// 0x00000038: str w2, [sp, #204] +// 0x0000003c: str w3, [sp, #208] +// 0x00000040: sub sp, sp, #0x20 (32) +// 0x00000044: .cfi_def_cfa_offset: 224 +// 0x00000044: add sp, sp, #0x20 (32) +// 0x00000048: .cfi_def_cfa_offset: 192 +// 0x00000048: .cfi_remember_state +// 0x00000048: ldp x19, x20, [sp, #96] +// 0x0000004c: .cfi_restore: r19 +// 0x0000004c: .cfi_restore: r20 +// 0x0000004c: ldp x21, x22, [sp, #112] +// 0x00000050: .cfi_restore: r21 +// 0x00000050: .cfi_restore: r22 +// 0x00000050: ldp x23, x24, [sp, #128] +// 0x00000054: .cfi_restore: r23 +// 0x00000054: .cfi_restore: r24 +// 0x00000054: ldp x25, x26, [sp, #144] +// 0x00000058: .cfi_restore: r25 +// 0x00000058: .cfi_restore: r26 +// 0x00000058: ldp x27, x28, [sp, #160] +// 0x0000005c: .cfi_restore: r27 +// 0x0000005c: .cfi_restore: r28 +// 0x0000005c: ldp x29, lr, [sp, #176] +// 0x00000060: .cfi_restore: r29 +// 0x00000060: .cfi_restore: r30 +// 0x00000060: ldp d8, d9, [sp, #32] +// 0x00000064: .cfi_restore_extended: r72 +// 0x00000064: .cfi_restore_extended: r73 +// 0x00000064: ldp d10, d11, [sp, #48] +// 0x00000068: .cfi_restore_extended: r74 +// 0x00000068: .cfi_restore_extended: r75 +// 0x00000068: ldp d12, d13, [sp, #64] +// 0x0000006c: .cfi_restore_extended: r76 +// 0x0000006c: .cfi_restore_extended: r77 +// 0x0000006c: ldp d14, d15, [sp, #80] +// 0x00000070: .cfi_restore_extended: r78 +// 0x00000070: .cfi_restore_extended: r79 +// 0x00000070: add sp, sp, #0xc0 (192) +// 0x00000074: .cfi_def_cfa_offset: 0 +// 0x00000074: ret +// 0x00000078: .cfi_restore_state +// 0x00000078: .cfi_def_cfa_offset: 192 static constexpr uint8_t expected_asm_kX86[] = { 0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3, diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 4344c90b98..8e7fd2b95f 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -184,7 +184,7 @@ uint32_t Arm64JniCallingConvention::CoreSpillMask() const { // Jni function is the native function which the java code wants to call. // Jni method is the method that compiled by jni compiler. // Call chain: managed code(java) --> jni method --> jni function. - // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21). + // Thread register(X19) is saved on stack. return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index c4eaabf899..49a0444f94 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -711,8 +711,8 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else { clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - HLoadClass* load_class = - new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc); + HLoadClass* load_class = new (arena_) HLoadClass( + storage_index, *dex_compilation_unit_->GetDexFile(), is_referrer_class, dex_pc); current_block_->AddInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); current_block_->AddInstruction(clinit_check); @@ -915,7 +915,8 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, *outer_compilation_unit_->GetDexFile(), storage_index); bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - HLoadClass* constant = new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc); + HLoadClass* constant = new (arena_) HLoadClass( + storage_index, *dex_compilation_unit_->GetDexFile(), is_referrer_class, dex_pc); current_block_->AddInstruction(constant); HInstruction* cls = constant; @@ -1151,7 +1152,10 @@ bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction, } HInstruction* object = LoadLocal(reference, Primitive::kPrimNot); HLoadClass* cls = new (arena_) HLoadClass( - type_index, IsOutermostCompilingClass(type_index), dex_pc); + type_index, + *dex_compilation_unit_->GetDexFile(), + IsOutermostCompilingClass(type_index), + dex_pc); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. Temporaries temps(graph_); @@ -1976,7 +1980,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 ? kQuickAllocObjectWithAccessCheck : kQuickAllocObject; - current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint)); + current_block_->AddInstruction(new (arena_) HNewInstance( + dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), entrypoint)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } break; @@ -2161,8 +2166,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 MaybeRecordStat(MethodCompilationStat::kNotCompiledCantAccesType); return false; } - current_block_->AddInstruction( - new (arena_) HLoadClass(type_index, IsOutermostCompilingClass(type_index), dex_pc)); + current_block_->AddInstruction(new (arena_) HLoadClass( + type_index, + *dex_compilation_unit_->GetDexFile(), + IsOutermostCompilingClass(type_index), + dex_pc)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); break; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 5f8023de2a..7d26a3c9ae 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -2878,7 +2878,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - Label less, greater, done; + NearLabel less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -2974,7 +2974,7 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, Register temp1, Register temp2, HInstruction* instruction) { - Label fail; + NearLabel fail; if (offset != 0) { __ LoadImmediate(temp1, offset); __ add(IP, addr, ShifterOperand(temp1)); @@ -3654,7 +3654,7 @@ void CodeGeneratorARM::MarkGCCard(Register temp, Register object, Register value, bool can_be_null) { - Label is_null; + NearLabel is_null; if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } @@ -4076,14 +4076,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { Register cls = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Label done, zero; + NearLabel done, zero; SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ cmp(obj, ShifterOperand(0)); - __ b(&zero, EQ); + __ CompareAndBranchIfZero(obj, &zero); } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, out, obj, class_offset); @@ -4134,16 +4133,19 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); + NearLabel done; // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ cmp(obj, ShifterOperand(0)); - __ b(slow_path->GetExitLabel(), EQ); + __ CompareAndBranchIfZero(obj, &done); } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); __ b(slow_path->GetEntryLabel(), NE); __ Bind(slow_path->GetExitLabel()); + if (instruction->MustDoNullCheck()) { + __ Bind(&done); + } } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index b56ca10874..ab793a5c92 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -44,7 +44,7 @@ static const vixl::FPRegister kParameterFPRegisters[] = { }; static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters); -const vixl::Register tr = vixl::x18; // Thread Register +const vixl::Register tr = vixl::x19; // Thread Register static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); @@ -52,10 +52,10 @@ const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); -// Callee-saved registers defined by AAPCS64. +// Callee-saved registers AAPCS64 (without x19 - Thread Register) const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister, vixl::kXRegSize, - vixl::x19.code(), + vixl::x20.code(), vixl::x30.code()); const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister, vixl::kDRegSize, diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 47c6318c95..a72817fade 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -141,7 +141,6 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, } if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) { - resolved_method->SetShouldNotInline(); return false; } @@ -208,6 +207,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, if (!builder.BuildGraph(*code_item)) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be built, so cannot be inlined"; + resolved_method->SetShouldNotInline(); return false; } @@ -215,12 +215,14 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, compiler_driver_->GetInstructionSet())) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " cannot be inlined because of the register allocator"; + resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be transformed to SSA"; + resolved_method->SetShouldNotInline(); return false; } @@ -257,6 +259,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, if (block->IsLoopHeader()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because it contains a loop"; + resolved_method->SetShouldNotInline(); return false; } @@ -272,6 +275,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because " << current->DebugName() << " can throw"; + resolved_method->SetShouldNotInline(); return false; } @@ -279,6 +283,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment"; + resolved_method->SetShouldNotInline(); return false; } @@ -286,6 +291,8 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; + // Do not flag the method as not-inlineable. A caller within the same + // dex file could still successfully inline it. return false; } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 9e18f114ad..8ef13e125e 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -327,9 +327,6 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { // TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod. void IntrinsicsRecognizer::Run() { - DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_); - DCHECK(inliner != nullptr); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); @@ -338,6 +335,9 @@ void IntrinsicsRecognizer::Run() { if (inst->IsInvoke()) { HInvoke* invoke = inst->AsInvoke(); InlineMethod method; + DexFileMethodInliner* inliner = + driver_->GetMethodInlinerMap()->GetMethodInliner(&invoke->GetDexFile()); + DCHECK(inliner != nullptr); if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { Intrinsics intrinsic = GetIntrinsic(method); @@ -345,7 +345,7 @@ void IntrinsicsRecognizer::Run() { if (!CheckInvokeType(intrinsic, invoke)) { LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " << intrinsic << " for " - << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_); + << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile()); } else { invoke->SetIntrinsic(intrinsic); } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index c243ef3f8b..741fb64fd8 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -30,16 +30,15 @@ class DexFile; // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: - IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver) + IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver) : HOptimization(graph, true, kIntrinsicsRecognizerPassName), - dex_file_(dex_file), driver_(driver) {} + driver_(driver) {} void Run() OVERRIDE; static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition"; private: - const DexFile* dex_file_; CompilerDriver* driver_; DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 91daeb7a4c..483c09e5a9 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1291,11 +1291,16 @@ void HGraph::DeleteDeadBlock(HBasicBlock* block) { block->RemovePhi(it.Current()->AsPhi()); } + if (block->IsExitBlock()) { + exit_block_ = nullptr; + } + reverse_post_order_.Delete(block); blocks_.Put(block->GetBlockId(), nullptr); } void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { + DCHECK(HasExitBlock()) << "Unimplemented scenario"; if (GetBlocks().Size() == 3) { // Simple case of an entry block, a body block, and an exit block. // Put the body block's instruction into `invoke`'s block. diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 12ace413b7..01870c36fa 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -155,6 +155,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { HBasicBlock* GetEntryBlock() const { return entry_block_; } HBasicBlock* GetExitBlock() const { return exit_block_; } + bool HasExitBlock() const { return exit_block_ != nullptr; } void SetEntryBlock(HBasicBlock* block) { entry_block_ = block; } void SetExitBlock(HBasicBlock* block) { exit_block_ = block; } @@ -2387,6 +2388,7 @@ class HInvoke : public HInstruction { uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetDexMethodIndex() const { return dex_method_index_; } + const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); } InvokeType GetOriginalInvokeType() const { return original_invoke_type_; } @@ -2598,14 +2600,19 @@ class HInvokeInterface : public HInvoke { class HNewInstance : public HExpression<0> { public: - HNewInstance(uint32_t dex_pc, uint16_t type_index, QuickEntrypointEnum entrypoint) + HNewInstance(uint32_t dex_pc, + uint16_t type_index, + const DexFile& dex_file, + QuickEntrypointEnum entrypoint) : HExpression(Primitive::kPrimNot, SideEffects::None()), dex_pc_(dex_pc), type_index_(type_index), + dex_file_(dex_file), entrypoint_(entrypoint) {} uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } + const DexFile& GetDexFile() const { return dex_file_; } // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } @@ -2624,6 +2631,7 @@ class HNewInstance : public HExpression<0> { private: const uint32_t dex_pc_; const uint16_t type_index_; + const DexFile& dex_file_; const QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); @@ -3428,10 +3436,12 @@ class HSuspendCheck : public HTemplateInstruction<0> { class HLoadClass : public HExpression<0> { public: HLoadClass(uint16_t type_index, + const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc) : HExpression(Primitive::kPrimNot, SideEffects::None()), type_index_(type_index), + dex_file_(dex_file), is_referrers_class_(is_referrers_class), dex_pc_(dex_pc), generate_clinit_check_(false), @@ -3487,12 +3497,15 @@ class HLoadClass : public HExpression<0> { return loaded_class_rti_.IsExact(); } + const DexFile& GetDexFile() { return dex_file_; } + bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; } DECLARE_INSTRUCTION(LoadClass); private: const uint16_t type_index_; + const DexFile& dex_file_; const bool is_referrers_class_; const uint32_t dex_pc_; // Whether this instruction must generate the initialization check. diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 2125f6eb01..ecb3b0a3a2 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -32,20 +32,20 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000012: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, - 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9, + 0xE0, 0x0F, 0x1C, 0xB8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, + 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, - 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, + 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; // 0x00000000: str w0, [sp, #-64]! // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: stp x19, x20, [sp, #40] -// 0x00000008: .cfi_offset: r19 at cfa-24 -// 0x00000008: .cfi_offset: r20 at cfa-16 +// 0x00000004: stp x20, x21, [sp, #40] +// 0x00000008: .cfi_offset: r20 at cfa-24 +// 0x00000008: .cfi_offset: r21 at cfa-16 // 0x00000008: str lr, [sp, #56] // 0x0000000c: .cfi_offset: r30 at cfa-8 // 0x0000000c: stp d8, d9, [sp, #24] @@ -55,9 +55,9 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000010: ldp d8, d9, [sp, #24] // 0x00000014: .cfi_restore_extended: r72 // 0x00000014: .cfi_restore_extended: r73 -// 0x00000014: ldp x19, x20, [sp, #40] -// 0x00000018: .cfi_restore: r19 +// 0x00000014: ldp x20, x21, [sp, #40] // 0x00000018: .cfi_restore: r20 +// 0x00000018: .cfi_restore: r21 // 0x00000018: ldr lr, [sp, #56] // 0x0000001c: .cfi_restore: r30 // 0x0000001c: add sp, sp, #0x40 (64) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b2e8ecd74a..fa3c310811 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -223,7 +223,6 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* driver, - const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info) const; @@ -316,7 +315,6 @@ static void RunOptimizations(HOptimization* optimizations[], static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, - const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { @@ -335,10 +333,10 @@ static void RunOptimizations(HGraph* graph, GVNOptimization gvn(graph, side_effects); LICM licm(graph, side_effects); BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles); + ReferenceTypePropagation type_propagation(graph, handles); InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); + IntrinsicsRecognizer intrinsics(graph, driver); HOptimization* optimizations[] = { &intrinsics, @@ -391,12 +389,11 @@ static void AllocateRegisters(HGraph* graph, CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* compiler_driver, - const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer) const { StackHandleScopeCollection handles(Thread::Current()); RunOptimizations(graph, compiler_driver, compilation_stats_.get(), - dex_file, dex_compilation_unit, pass_info_printer, &handles); + dex_compilation_unit, pass_info_printer, &handles); AllocateRegisters(graph, codegen, pass_info_printer); @@ -585,7 +582,6 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite return CompileOptimized(graph, codegen.get(), compiler_driver, - dex_file, dex_compilation_unit, &pass_info_printer); } else if (shouldOptimize && can_allocate_registers) { diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 12b1c2b9bd..601b48aafa 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -161,7 +161,8 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { ScopedObjectAccess soa(Thread::Current()); - mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_); + mirror::DexCache* dex_cache = + Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); // Get type from dex cache assuming it was populated by the verifier. mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); if (resolved_class != nullptr) { @@ -172,7 +173,8 @@ void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_); + mirror::DexCache* dex_cache = + Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); // Get type from dex cache assuming it was populated by the verifier. mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); if (resolved_class != nullptr) { diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 733e18e689..b68fc67579 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -30,13 +30,8 @@ namespace art { */ class ReferenceTypePropagation : public HOptimization { public: - ReferenceTypePropagation(HGraph* graph, - const DexFile& dex_file, - const DexCompilationUnit& dex_compilation_unit, - StackHandleScopeCollection* handles) + ReferenceTypePropagation(HGraph* graph, StackHandleScopeCollection* handles) : HOptimization(graph, true, kReferenceTypePropagationPassName), - dex_file_(dex_file), - dex_compilation_unit_(dex_compilation_unit), handles_(handles), worklist_(graph->GetArena(), kDefaultWorklistSize) {} @@ -66,8 +61,6 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - const DexFile& dex_file_; - const DexCompilationUnit& dex_compilation_unit_; StackHandleScopeCollection* handles_; GrowableArray<HInstruction*> worklist_; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index b446815770..5663e3973d 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -144,14 +144,17 @@ size_t StackMapStream::PrepareForFillIn() { // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. needed_size_ = CodeInfo::kFixedSize - + dex_register_location_catalog_size_ + stack_maps_size_ + + dex_register_location_catalog_size_ + dex_register_maps_size_ + inline_info_size_; - dex_register_location_catalog_start_ = CodeInfo::kFixedSize; - stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_; - dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_; + stack_maps_start_ = CodeInfo::kFixedSize; + // TODO: Move the catalog at the end. It is currently too expensive at runtime + // to compute its size (note that we do not encode that size in the CodeInfo). + dex_register_location_catalog_start_ = stack_maps_start_ + stack_maps_size_; + dex_register_maps_start_ = + dex_register_location_catalog_start_ + dex_register_location_catalog_size_; inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_; return needed_size_; diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index e2c2e27b85..9179965a9d 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -33,6 +33,16 @@ namespace arm { class Arm32Assembler; class Thumb2Assembler; +// This class indicates that the label and its uses +// will fall into a range that is encodable in 16bits on thumb2. +class NearLabel : public Label { + public: + NearLabel() {} + + private: + DISALLOW_COPY_AND_ASSIGN(NearLabel); +}; + class ShifterOperand { public: ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister), @@ -519,6 +529,9 @@ class ArmAssembler : public Assembler { // Branch instructions. virtual void b(Label* label, Condition cond = AL) = 0; + virtual void b(NearLabel* label, Condition cond = AL) { + b(static_cast<Label*>(label), cond); + } virtual void bl(Label* label, Condition cond = AL) = 0; virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; @@ -654,6 +667,9 @@ class ArmAssembler : public Assembler { virtual void Bind(Label* label) = 0; virtual void CompareAndBranchIfZero(Register r, Label* label) = 0; + virtual void CompareAndBranchIfZero(Register r, NearLabel* label) { + CompareAndBranchIfZero(r, static_cast<Label*>(label)); + } virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0; // diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 82fed509d7..3164623fd9 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -200,8 +200,8 @@ class Arm32Assembler FINAL : public ArmAssembler { void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; // Branch instructions. - void b(Label* label, Condition cond = AL); - void bl(Label* label, Condition cond = AL); + void b(Label* label, Condition cond = AL) OVERRIDE; + void bl(Label* label, Condition cond = AL) OVERRIDE; void blx(Register rm, Condition cond = AL) OVERRIDE; void bx(Register rm, Condition cond = AL) OVERRIDE; void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index ab83f95084..26cb6c3739 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -671,11 +671,17 @@ void Thumb2Assembler::vcmpdz(DRegister dd, Condition cond) { EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0); } + void Thumb2Assembler::b(Label* label, Condition cond) { EmitBranch(cond, label, false, false); } +void Thumb2Assembler::b(NearLabel* label, Condition cond) { + EmitBranch(cond, label, false, false, /* is_near */ true); +} + + void Thumb2Assembler::bl(Label* label, Condition cond) { CheckCondition(cond); EmitBranch(cond, label, true, false); @@ -1369,6 +1375,7 @@ void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const { uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) { + CHECK(IsLowRegister(rn)); uint32_t location = buffer_.Size(); // This is always unresolved as it must be a forward branch. @@ -1613,7 +1620,7 @@ void Thumb2Assembler::EmitMultiMemOp(Condition cond, } -void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) { +void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near) { uint32_t pc = buffer_.Size(); Branch::Type branch_type; if (cond == AL) { @@ -1644,8 +1651,8 @@ void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x } } else { // Branch is to an unbound label. Emit space for it. - uint16_t branch_id = AddBranch(branch_type, pc, cond); // Unresolved branch. - if (!CanRelocateBranches() || force_32bit_) { + uint16_t branch_id = AddBranch(branch_type, pc, cond, is_near); // Unresolved branch. + if (force_32bit_ || (!CanRelocateBranches() && !is_near)) { Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. Emit16(0); // another 16 bits. } else { @@ -2199,6 +2206,9 @@ void Thumb2Assembler::cbz(Register rn, Label* label) { if (label->IsBound()) { LOG(FATAL) << "cbz can only be used to branch forwards"; UNREACHABLE(); + } else if (IsHighRegister(rn)) { + LOG(FATAL) << "cbz can only be used with low registers"; + UNREACHABLE(); } else { uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), false); label->LinkTo(branchid); @@ -2211,6 +2221,9 @@ void Thumb2Assembler::cbnz(Register rn, Label* label) { if (label->IsBound()) { LOG(FATAL) << "cbnz can only be used to branch forwards"; UNREACHABLE(); + } else if (IsHighRegister(rn)) { + LOG(FATAL) << "cbnz can only be used with low registers"; + UNREACHABLE(); } else { uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), true); label->LinkTo(branchid); @@ -2741,7 +2754,17 @@ void Thumb2Assembler::dmb(DmbOptions flavor) { void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) { - if (CanRelocateBranches()) { + if (CanRelocateBranches() && IsLowRegister(r)) { + cbz(r, label); + } else { + cmp(r, ShifterOperand(0)); + b(label, EQ); + } +} + + +void Thumb2Assembler::CompareAndBranchIfZero(Register r, NearLabel* label) { + if (IsLowRegister(r)) { cbz(r, label); } else { cmp(r, ShifterOperand(0)); @@ -2751,7 +2774,7 @@ void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) { void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { - if (CanRelocateBranches()) { + if (CanRelocateBranches() && IsLowRegister(r)) { cbnz(r, label); } else { cmp(r, ShifterOperand(0)); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 2da8ee27f5..2382b74c30 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -238,6 +238,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Branch instructions. void b(Label* label, Condition cond = AL); + void b(NearLabel* label, Condition cond = AL); void bl(Label* label, Condition cond = AL); void blx(Label* label); void blx(Register rm, Condition cond = AL) OVERRIDE; @@ -272,6 +273,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; + void CompareAndBranchIfZero(Register r, NearLabel* label) OVERRIDE; void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; // Memory barriers. @@ -430,7 +432,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); - void EmitBranch(Condition cond, Label* label, bool link, bool x); + void EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near = false); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); int32_t EncodeTstOffset(int offset, int32_t inst); @@ -558,6 +560,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Resolve a branch when the target is known. If this causes the // size of the branch to change return true. Otherwise return false. bool Resolve(uint32_t target) { + uint32_t old_target = target_; target_ = target; if (assembler_->CanRelocateBranches()) { Size new_size = CalculateSize(); @@ -568,9 +571,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { return false; } else { if (kIsDebugBuild) { - Size new_size = CalculateSize(); - // Check that the size has not increased. - DCHECK(!(new_size == k32Bit && size_ == k16Bit)); + if (old_target == kUnresolved) { + // Check that the size has not increased. + DCHECK(!(CalculateSize() == k32Bit && size_ == k16Bit)); + } else { + DCHECK(CalculateSize() == size_); + } } return false; } @@ -650,6 +656,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) { return k32Bit; } + if (IsCompareAndBranch()) { + // Compare and branch instructions can only be encoded on 16 bits. + return k16Bit; + } return assembler_->CanRelocateBranches() ? k16Bit : k32Bit; } // When the target is resolved, we know the best encoding for it. @@ -713,8 +723,15 @@ class Thumb2Assembler FINAL : public ArmAssembler { } // Add an unresolved branch and return its id. - uint16_t AddBranch(Branch::Type type, uint32_t location, Condition cond = AL) { - branches_.push_back(new Branch(this, type, location, cond)); + uint16_t AddBranch(Branch::Type type, + uint32_t location, + Condition cond = AL, + bool is_near = false) { + Branch* branch = new Branch(this, type, location, cond); + if (is_near) { + branch->ResetSize(Branch::k16Bit); + } + branches_.push_back(branch); return branches_.size() - 1; } diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 3ee79a103f..9cc0c914a0 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -51,11 +51,11 @@ void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) { } void Arm64Assembler::GetCurrentThread(ManagedRegister tr) { - ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(ETR)); + ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR)); } void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) { - StoreToOffset(ETR, SP, offset.Int32Value()); + StoreToOffset(TR, SP, offset.Int32Value()); } // See Arm64 PCS Section 5.2.2.1. @@ -167,7 +167,7 @@ void Arm64Assembler::StoreImmediateToThread64(ThreadOffset<8> offs, uint32_t imm Arm64ManagedRegister scratch = m_scratch.AsArm64(); CHECK(scratch.IsXRegister()) << scratch; LoadImmediate(scratch.AsXRegister(), imm); - StoreToOffset(scratch.AsXRegister(), ETR, offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value()); } void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs, @@ -176,14 +176,14 @@ void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs, Arm64ManagedRegister scratch = m_scratch.AsArm64(); CHECK(scratch.IsXRegister()) << scratch; AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); } void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) { vixl::UseScratchRegisterScope temps(vixl_masm_); vixl::Register temp = temps.AcquireX(); ___ Mov(temp, reg_x(SP)); - ___ Str(temp, MEM_OP(reg_x(ETR), tr_offs.Int32Value())); + ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value())); } void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source, @@ -284,7 +284,7 @@ void Arm64Assembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { } void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) { - return Load(m_dst.AsArm64(), ETR, src.Int32Value(), size); + return Load(m_dst.AsArm64(), TR, src.Int32Value(), size); } void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) { @@ -319,7 +319,7 @@ void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, O void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) { Arm64ManagedRegister dst = m_dst.AsArm64(); CHECK(dst.IsXRegister()) << dst; - LoadFromOffset(dst.AsXRegister(), ETR, offs.Int32Value()); + LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value()); } // Copying routines. @@ -357,7 +357,7 @@ void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, ManagedRegister m_scratch) { Arm64ManagedRegister scratch = m_scratch.AsArm64(); CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value()); + LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); } @@ -367,7 +367,7 @@ void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset<8> tr_offs, Arm64ManagedRegister scratch = m_scratch.AsArm64(); CHECK(scratch.IsXRegister()) << scratch; LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); } void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src, @@ -610,7 +610,7 @@ void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjus Arm64ManagedRegister scratch = m_scratch.AsArm64(); Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust); exception_blocks_.push_back(current_exception); - LoadFromOffset(scratch.AsXRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value()); + LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value()); ___ Cbnz(reg_x(scratch.AsXRegister()), current_exception->Entry()); } @@ -627,12 +627,7 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { // Pass exception object as argument. // Don't care about preserving X0 as this won't return. ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister())); - ___ Ldr(temp, MEM_OP(reg_x(ETR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value())); - - // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls - // to external functions that might trash TR. We do not need the original - // ETR(X21) saved in BuildFrame(). - ___ Mov(reg_x(TR), reg_x(ETR)); + ___ Ldr(temp, MEM_OP(reg_x(TR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value())); ___ Blr(temp); // Call should never return. @@ -713,12 +708,7 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, SpillRegisters(core_reg_list, frame_size - core_reg_size); SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - // Note: This is specific to JNI method frame. - // We will need to move TR(Caller saved in AAPCS) to ETR(Callee saved in AAPCS). The original - // (ETR)X21 has been saved on stack. In this way, we can restore TR later. - DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR))); - DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR))); - ___ Mov(reg_x(ETR), reg_x(TR)); + DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); // Write StackReference<Method>. DCHECK(X0 == method_reg.AsArm64().AsXRegister()); @@ -771,11 +761,7 @@ void Arm64Assembler::RemoveFrame(size_t frame_size, DCHECK_GE(frame_size, core_reg_size + fp_reg_size + sizeof(StackReference<mirror::ArtMethod>)); DCHECK_ALIGNED(frame_size, kStackAlignment); - // Note: This is specific to JNI method frame. - // Restore TR(Caller saved in AAPCS) from ETR(Callee saved in AAPCS). - DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR))); - DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR))); - ___ Mov(reg_x(TR), reg_x(ETR)); + DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); cfi_.RememberState(); diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc index 32c2e624c3..e27115d78a 100644 --- a/compiler/utils/arm64/managed_register_arm64_test.cc +++ b/compiler/utils/arm64/managed_register_arm64_test.cc @@ -623,7 +623,7 @@ TEST(Arm64ManagedRegister, VixlRegisters) { EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29))); EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30))); - EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(TR))); + EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR))); EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0))); EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1))); EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP))); diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index 574051a657..c8b3fe58a8 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -216,9 +216,9 @@ class AssemblerTestInfrastructure { bool success = Exec(args, error_msg); if (!success) { - LOG(INFO) << "Assembler command line:"; + LOG(ERROR) << "Assembler command line:"; for (std::string arg : args) { - LOG(INFO) << arg; + LOG(ERROR) << arg; } } return success; diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 773862710d..1a2c9a9000 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -1338,6 +1338,24 @@ TEST(Thumb2AssemblerTest, LoadStoreLimits) { delete assembler; } +TEST(Thumb2AssemblerTest, CompareAndBranch) { + arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + + arm::NearLabel label; + __ CompareAndBranchIfZero(arm::R0, &label); + __ CompareAndBranchIfZero(arm::R11, &label); + __ CompareAndBranchIfNonZero(arm::R0, &label); + __ CompareAndBranchIfNonZero(arm::R11, &label); + __ Bind(&label); + + size_t cs = __ CodeSize(); + std::vector<uint8_t> managed_code(cs); + MemoryRegion code(&managed_code[0], managed_code.size()); + __ FinalizeInstructions(code); + dump(managed_code, "CompareAndBranch"); + delete assembler; +} + #undef __ } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 3d03234e04..841d6a00c0 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -4822,6 +4822,16 @@ const char* LoadStoreLimitsResults[] = { " 30: f8a4 0040 strh.w r0, [r4, #64] ; 0x40\n", nullptr }; +const char* CompareAndBranchResults[] = { + " 0: b130 cbz r0, 10 <CompareAndBranch+0x10>\n", + " 2: f1bb 0f00 cmp.w fp, #0\n", + " 6: d003 beq.n 10 <CompareAndBranch+0x10>\n", + " 8: b910 cbnz r0, 10 <CompareAndBranch+0x10>\n", + " a: f1bb 0f00 cmp.w fp, #0\n", + " e: d1ff bne.n 10 <CompareAndBranch+0x10>\n", + nullptr +}; + std::map<std::string, const char**> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; @@ -4869,4 +4879,5 @@ void setup_results() { test_results["LoadStoreRegOffset"] = LoadStoreRegOffsetResults; test_results["LoadStoreLiteral"] = LoadStoreLiteralResults; test_results["LoadStoreLimits"] = LoadStoreLimitsResults; + test_results["CompareAndBranch"] = CompareAndBranchResults; } |