diff options
| author | 2013-11-06 01:33:15 -0800 | |
|---|---|---|
| committer | 2013-11-06 01:33:15 -0800 | |
| commit | 8e16efc49eda5ea8c13da577cb2e9f625f659f1f (patch) | |
| tree | 533cf9a77046a49525d916043a58949e59319f2d /compiler/dex/quick | |
| parent | f50a0e17ae5443bed0f80d628f71e5effc2faf07 (diff) | |
| parent | 7020278bce98a0735dc6abcbd33bdf1ed2634f1d (diff) | |
Merge remote-tracking branch 'goog/dalvik-dev' into merge-art
Diffstat (limited to 'compiler/dex/quick')
30 files changed, 2395 insertions, 1796 deletions
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index 2f54190ae7..ffaaf84503 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -296,6 +296,8 @@ enum ArmOpcode { kThumbOrr, // orr [0100001100] rm[5..3] rd[2..0]. kThumbPop, // pop [1011110] r[8..8] rl[7..0]. kThumbPush, // push [1011010] r[8..8] rl[7..0]. + kThumbRev, // rev [1011101000] rm[5..3] rd[2..0] + kThumbRevsh, // revsh [1011101011] rm[5..3] rd[2..0] kThumbRorRR, // ror [0100000111] rs[5..3] rd[2..0]. kThumbSbc, // sbc [0100000110] rm[5..3] rd[2..0]. kThumbStmia, // stmia [11000] rn[10..8] reglist [7.. 0]. @@ -378,6 +380,8 @@ enum ArmOpcode { kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0]. kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2SdivRRR, // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. + kThumb2UdivRRR, // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0]. kThumb2RsubRRI8, // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0]. kThumb2NegRR, // actually rsub rd, rn, #0. @@ -399,6 +403,8 @@ enum ArmOpcode { kThumb2AdcRRI8, // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8. kThumb2SubRRI8, // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8. kThumb2SbcRRI8, // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2RevRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0] + kThumb2RevshRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0] kThumb2It, // it [10111111] firstcond[7-4] mask[3-0]. kThumb2Fmstat, // fmstat [11101110111100011111101000010000]. kThumb2Vcmpd, // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0]. @@ -462,7 +468,7 @@ enum ArmOpDmbOptions { // Instruction assembly field_loc kind. enum ArmEncodingKind { - kFmtUnused, + kFmtUnused, // Unused field and marks end of formats. kFmtBitBlt, // Bit string using end/start. kFmtDfp, // Double FP reg. kFmtSfp, // Single FP reg. @@ -477,6 +483,7 @@ enum ArmEncodingKind { kFmtBrOffset, // Signed extended [26,11,13,21-16,10-0]:0. kFmtFPImm, // Encoded floating point immediate. kFmtOff24, // 24-bit Thumb2 unconditional branch encoding. + kFmtSkip, // Unused field, but continue to next. }; // Struct used to define the snippet positions for each Thumb opcode. @@ -492,6 +499,7 @@ struct ArmEncodingMap { const char* name; const char* fmt; int size; // Note: size is in bytes. + FixupKind fixup; }; } // namespace art diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 0649c9f319..3d0f263fad 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -37,9 +37,9 @@ namespace art { * fmt: for pretty-printing */ #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ - k3, k3s, k3e, flags, name, fmt, size) \ + k3, k3s, k3e, flags, name, fmt, size, fixup) \ {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ - {k3, k3s, k3e}}, opcode, flags, name, fmt, size} + {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup} /* Instruction dump string format keys: !pf, where "!" is the start * of the key, "p" is which numeric operand to use and "f" is the @@ -79,916 +79,966 @@ namespace art { const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { ENCODING_MAP(kArm16BitData, 0x0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2), + kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone), ENCODING_MAP(kThumbAdcRR, 0x4140, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES, - "adcs", "!0C, !1C", 2), + "adcs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRI3, 0x1c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "adds", "!0C, !1C, #!2d", 2), + "adds", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbAddRI8, 0x3000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, - "adds", "!0C, !0C, #!1d", 2), + "adds", "!0C, !0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbAddRRR, 0x1800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, - "adds", "!0C, !1C, !2C", 2), + "adds", "!0C, !1C, !2C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRLH, 0x4440, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2), + "add", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRHL, 0x4480, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2), + "add", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRHH, 0x44c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2), + "add", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddPcRel, 0xa000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP, - "add", "!0C, pc, #!1E", 2), + "add", "!0C, pc, #!1E", 2, kFixupLoad), ENCODING_MAP(kThumbAddSpRel, 0xa800, - kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP, - "add", "!0C, sp, #!2E", 2), + "add", "!0C, sp, #!2E", 2, kFixupNone), ENCODING_MAP(kThumbAddSpI7, 0xb000, kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, - "add", "sp, #!0d*4", 2), + "add", "sp, #!0d*4", 2, kFixupNone), ENCODING_MAP(kThumbAndRR, 0x4000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "ands", "!0C, !1C", 2), + "ands", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAsrRRI5, 0x1000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "asrs", "!0C, !1C, #!2d", 2), + "asrs", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbAsrRR, 0x4100, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "asrs", "!0C, !1C", 2), + "asrs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbBCond, 0xd000, kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | - NEEDS_FIXUP, "b!1c", "!0t", 2), + NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch), ENCODING_MAP(kThumbBUncond, 0xe000, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, - "b", "!0t", 2), + "b", "!0t", 2, kFixupT1Branch), ENCODING_MAP(kThumbBicRR, 0x4380, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "bics", "!0C, !1C", 2), + "bics", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbBkpt, 0xbe00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bkpt", "!0d", 2), + "bkpt", "!0d", 2, kFixupNone), ENCODING_MAP(kThumbBlx1, 0xf000, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | - NEEDS_FIXUP, "blx_1", "!0u", 2), + NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1), ENCODING_MAP(kThumbBlx2, 0xe800, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | - NEEDS_FIXUP, "blx_2", "!0v", 2), + NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel), ENCODING_MAP(kThumbBl1, 0xf000, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, - "bl_1", "!0u", 2), + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl_1", "!0u", 2, kFixupBl1), ENCODING_MAP(kThumbBl2, 0xf800, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, - "bl_2", "!0v", 2), + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl_2", "!0v", 2, kFixupLabel), ENCODING_MAP(kThumbBlxR, 0x4780, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR, - "blx", "!0C", 2), + "blx", "!0C", 2, kFixupNone), ENCODING_MAP(kThumbBx, 0x4700, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bx", "!0C", 2), + "bx", "!0C", 2, kFixupNone), ENCODING_MAP(kThumbCmnRR, 0x42c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmn", "!0C, !1C", 2), + "cmn", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpRI8, 0x2800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmp", "!0C, #!1d", 2), + "cmp", "!0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbCmpRR, 0x4280, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpLH, 0x4540, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpHL, 0x4580, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpHH, 0x45c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbEorRR, 0x4040, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "eors", "!0C, !1C", 2), + "eors", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbLdmia, 0xc800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 2), + "ldmia", "!0C!!, <!1R>", 2, kFixupNone), ENCODING_MAP(kThumbLdrRRI5, 0x6800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #!2E]", 2), + "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbLdrRRR, 0x5800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldr", "!0C, [!1C, !2C]", 2), + "ldr", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrPcRel, 0x4800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC - | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2), + | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad), ENCODING_MAP(kThumbLdrSpRel, 0x9800, - kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP - | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2), + | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbLdrbRRI5, 0x7800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrb", "!0C, [!1C, #2d]", 2), + "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone), ENCODING_MAP(kThumbLdrbRRR, 0x5c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrb", "!0C, [!1C, !2C]", 2), + "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrhRRI5, 0x8800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrh", "!0C, [!1C, #!2F]", 2), + "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone), ENCODING_MAP(kThumbLdrhRRR, 0x5a00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrh", "!0C, [!1C, !2C]", 2), + "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrsbRRR, 0x5600, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsb", "!0C, [!1C, !2C]", 2), + "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrshRRR, 0x5e00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsh", "!0C, [!1C, !2C]", 2), + "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLslRRI5, 0x0000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "lsls", "!0C, !1C, #!2d", 2), + "lsls", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbLslRR, 0x4080, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "lsls", "!0C, !1C", 2), + "lsls", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbLsrRRI5, 0x0800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "lsrs", "!0C, !1C, #!2d", 2), + "lsrs", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbLsrRR, 0x40c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "lsrs", "!0C, !1C", 2), + "lsrs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovImm, 0x2000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | SETS_CCODES, - "movs", "!0C, #!1d", 2), + "movs", "!0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbMovRR, 0x1c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "movs", "!0C, !1C", 2), + "movs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovRR_H2H, 0x46c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2), + "mov", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovRR_H2L, 0x4640, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2), + "mov", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovRR_L2H, 0x4680, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2), + "mov", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMul, 0x4340, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "muls", "!0C, !1C", 2), + "muls", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMvn, 0x43c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "mvns", "!0C, !1C", 2), + "mvns", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbNeg, 0x4240, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "negs", "!0C, !1C", 2), + "negs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbOrr, 0x4300, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "orrs", "!0C, !1C", 2), + "orrs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbPop, 0xbc00, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 - | IS_LOAD, "pop", "<!0R>", 2), + | IS_LOAD, "pop", "<!0R>", 2, kFixupNone), ENCODING_MAP(kThumbPush, 0xb400, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 - | IS_STORE, "push", "<!0R>", 2), + | IS_STORE, "push", "<!0R>", 2, kFixupNone), + ENCODING_MAP(kThumbRev, 0xba00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1, + "rev", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbRevsh, 0xbac0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1, + "rev", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbRorRR, 0x41c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "rors", "!0C, !1C", 2), + "rors", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbSbc, 0x4180, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES, - "sbcs", "!0C, !1C", 2), + "sbcs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbStmia, 0xc000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE, - "stmia", "!0C!!, <!1R>", 2), + "stmia", "!0C!!, <!1R>", 2, kFixupNone), ENCODING_MAP(kThumbStrRRI5, 0x6000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #!2E]", 2), + "str", "!0C, [!1C, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbStrRRR, 0x5000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "str", "!0C, [!1C, !2C]", 2), + "str", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbStrSpRel, 0x9000, - kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP - | IS_STORE, "str", "!0C, [sp, #!2E]", 2), + | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbStrbRRI5, 0x7000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strb", "!0C, [!1C, #!2d]", 2), + "strb", "!0C, [!1C, #!2d]", 2, kFixupNone), ENCODING_MAP(kThumbStrbRRR, 0x5400, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "strb", "!0C, [!1C, !2C]", 2), + "strb", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbStrhRRI5, 0x8000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strh", "!0C, [!1C, #!2F]", 2), + "strh", "!0C, [!1C, #!2F]", 2, kFixupNone), ENCODING_MAP(kThumbStrhRRR, 0x5200, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "strh", "!0C, [!1C, !2C]", 2), + "strh", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbSubRRI3, 0x1e00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C, !1C, #!2d", 2), + "subs", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbSubRI8, 0x3800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, - "subs", "!0C, #!1d", 2), + "subs", "!0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbSubRRR, 0x1a00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, - "subs", "!0C, !1C, !2C", 2), + "subs", "!0C, !1C, !2C", 2, kFixupNone), ENCODING_MAP(kThumbSubSpI7, 0xb080, kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, - "sub", "sp, #!0d*4", 2), + "sub", "sp, #!0d*4", 2, kFixupNone), ENCODING_MAP(kThumbSwi, 0xdf00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "swi", "!0d", 2), + "swi", "!0d", 2, kFixupNone), ENCODING_MAP(kThumbTst, 0x4200, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES, - "tst", "!0C, !1C", 2), + "tst", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumb2Vldrs, 0xed900a00, kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | - REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4), + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad), ENCODING_MAP(kThumb2Vldrd, 0xed900b00, kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | - REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4), + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad), ENCODING_MAP(kThumb2Vmuls, 0xee200a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vmuls", "!0s, !1s, !2s", 4), + "vmuls", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vmuld, 0xee200b00, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vmuld", "!0S, !1S, !2S", 4), + "vmuld", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Vstrs, 0xed800a00, kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "vstr", "!0s, [!1C, #!2E]", 4), + "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Vstrd, 0xed800b00, kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "vstr", "!0S, [!1C, #!2E]", 4), + "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Vsubs, 0xee300a40, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vsub", "!0s, !1s, !2s", 4), + "vsub", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vsubd, 0xee300b40, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vsub", "!0S, !1S, !2S", 4), + "vsub", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Vadds, 0xee300a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vadd", "!0s, !1s, !2s", 4), + "vadd", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vaddd, 0xee300b00, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vadd", "!0S, !1S, !2S", 4), + "vadd", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Vdivs, 0xee800a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vdivs", "!0s, !1s, !2s", 4), + "vdivs", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vdivd, 0xee800b00, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vdivd", "!0S, !1S, !2S", 4), + "vdivd", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f32", "!0s, !1s", 4), + "vcvt.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtID, 0xeeb80bc0, kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64", "!0S, !1s", 4), + "vcvt.f64", "!0S, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtFI, 0xeebd0ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.s32.f32 ", "!0s, !1s", 4), + "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtDI, 0xeebd0bc0, kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.s32.f64 ", "!0s, !1S", 4), + "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtFd, 0xeeb70ac0, kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64.f32 ", "!0S, !1s", 4), + "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtDF, 0xeeb70bc0, kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f32.f64 ", "!0s, !1S", 4), + "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vsqrt.f32 ", "!0s, !1s", 4), + "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vsqrtd, 0xeeb10bc0, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vsqrt.f64 ", "!0S, !1S", 4), + "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */ kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mov", "!0C, #!1m", 4), + "mov", "!0C, #!1m", 4, kFixupNone), ENCODING_MAP(kThumb2MovImm16, 0xf2400000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mov", "!0C, #!1M", 4), + "mov", "!0C, #!1M", 4, kFixupNone), ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #!2d]", 4), + "str", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #!2d]", 4), + "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #-!2d]", 4), + "str", "!0C, [!1C, #-!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #-!2d]", 4), + "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */ kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | - NEEDS_FIXUP, "cbnz", "!0C,!1t", 2), + NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ), ENCODING_MAP(kThumb2Cbz, 0xb100, /* Note: does not affect flags */ kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | - NEEDS_FIXUP, "cbz", "!0C,!1t", 2), + NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ), ENCODING_MAP(kThumb2AddRRI12, 0xf2000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ - "add", "!0C,!1C,#!2d", 4), + "add", "!0C,!1C,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2MovRR, 0xea4f0000, /* no setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 4), + "mov", "!0C, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovs, 0xeeb00a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f32 ", " !0s, !1s", 4), + "vmov.f32 ", " !0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovd, 0xeeb00b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f64 ", " !0S, !1S", 4), + "vmov.f64 ", " !0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Ldmia, 0xe8900000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 4), + "ldmia", "!0C!!, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumb2Stmia, 0xe8800000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE, - "stmia", "!0C!!, <!1R>", 4), + "stmia", "!0C!!, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "adds", "!0C, !1C, !2C!3H", 4), + "adds", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2SubRRR, 0xebb00000, /* setflags enconding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "subs", "!0C, !1C, !2C!3H", 4), + "subs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2SbcRRR, 0xeb700000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES, - "sbcs", "!0C, !1C, !2C!3H", 4), + "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2CmpRR, 0xebb00f00, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 4), + "cmp", "!0C, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2SubRRI12, 0xf2a00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ - "sub", "!0C,!1C,#!2d", 4), + "sub", "!0C,!1C,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2MvnImm12, 0xf06f0000, /* no setflags encoding */ kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mvn", "!0C, #!1n", 4), + "mvn", "!0C, #!1n", 4, kFixupNone), ENCODING_MAP(kThumb2Sel, 0xfaa0f080, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES, - "sel", "!0C, !1C, !2C", 4), + "sel", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2Ubfx, 0xf3c00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "ubfx", "!0C, !1C, #!2d, #!3d", 4), + "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), ENCODING_MAP(kThumb2Sbfx, 0xf3400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "sbfx", "!0C, !1C, #!2d, #!3d", 4), + "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), ENCODING_MAP(kThumb2LdrRRR, 0xf8500000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrRRR, 0xf8400000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "str", "!0C, [!1C, !2C, LSL #!3d]", 4), + "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrhRRR, 0xf8200000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "strh", "!0C, [!1C, !2C, LSL #!3d]", 4), + "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrbRRR, 0xf8000000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "strb", "!0C, [!1C, !2C, LSL #!3d]", 4), + "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrh", "!0C, [!1C, #!2d]", 4), + "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrsh", "!0C, [!1C, #!2d]", 4), + "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrb", "!0C, [!1C, #!2d]", 4), + "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrsb", "!0C, [!1C, #!2d]", 4), + "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strh", "!0C, [!1C, #!2d]", 4), + "strh", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strb", "!0C, [!1C, #!2d]", 4), + "strb", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2Pop, 0xe8bd0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 - | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4), + | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop), ENCODING_MAP(kThumb2Push, 0xe92d0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 - | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4), + | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop), ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmp", "!0C, #!1m", 4), + "cmp", "!0C, #!1m", 4, kFixupNone), ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "adcs", "!0C, !1C, !2C!3H", 4), + "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2AndRRR, 0xea000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "and", "!0C, !1C, !2C!3H", 4), + "and", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2BicRRR, 0xea200000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "bic", "!0C, !1C, !2C!3H", 4), + "bic", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2CmnRR, 0xeb000000, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "cmn", "!0C, !1C, shift !2d", 4), + "cmn", "!0C, !1C, shift !2d", 4, kFixupNone), ENCODING_MAP(kThumb2EorRRR, 0xea800000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "eor", "!0C, !1C, !2C!3H", 4), + "eor", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2MulRRR, 0xfb00f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "mul", "!0C, !1C, !2C", 4), + "mul", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2SdivRRR, 0xfb90f0f0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sdiv", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2UdivRRR, 0xfbb0f0f0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "udiv", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2MnvRR, 0xea6f0000, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "mvn", "!0C, !1C, shift !2d", 4), + "mvn", "!0C, !1C, shift !2d", 4, kFixupNone), ENCODING_MAP(kThumb2RsubRRI8, 0xf1d00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "rsb", "!0C,!1C,#!2m", 4), + "rsb", "!0C,!1C,#!2m", 4, kFixupNone), ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "neg", "!0C,!1C", 4), + "neg", "!0C,!1C", 4, kFixupNone), ENCODING_MAP(kThumb2OrrRRR, 0xea400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "orr", "!0C, !1C, !2C!3H", 4), + "orr", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2TstRR, 0xea100f00, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, - "tst", "!0C, !1C, shift !2d", 4), + "tst", "!0C, !1C, shift !2d", 4, kFixupNone), ENCODING_MAP(kThumb2LslRRR, 0xfa00f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "lsl", "!0C, !1C, !2C", 4), + "lsl", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2LsrRRR, 0xfa20f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "lsr", "!0C, !1C, !2C", 4), + "lsr", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2AsrRRR, 0xfa40f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "asr", "!0C, !1C, !2C", 4), + "asr", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2RorRRR, 0xfa60f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "ror", "!0C, !1C, !2C", 4), + "ror", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2LslRRI5, 0xea4f0000, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "lsl", "!0C, !1C, #!2d", 4), + "lsl", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2LsrRRI5, 0xea4f0010, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "lsr", "!0C, !1C, #!2d", 4), + "lsr", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2AsrRRI5, 0xea4f0020, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "asr", "!0C, !1C, #!2d", 4), + "asr", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2RorRRI5, 0xea4f0030, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "ror", "!0C, !1C, #!2d", 4), + "ror", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2BicRRI8, 0xf0200000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "bic", "!0C, !1C, #!2m", 4), + "bic", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2AndRRI8, 0xf0000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "and", "!0C, !1C, #!2m", 4), + "and", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2OrrRRI8, 0xf0400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "orr", "!0C, !1C, #!2m", 4), + "orr", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2EorRRI8, 0xf0800000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "eor", "!0C, !1C, #!2m", 4), + "eor", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2AddRRI8, 0xf1100000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "adds", "!0C, !1C, #!2m", 4), + "adds", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2AdcRRI8, 0xf1500000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, - "adcs", "!0C, !1C, #!2m", 4), + "adcs", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2SubRRI8, 0xf1b00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C, !1C, #!2m", 4), + "subs", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2SbcRRI8, 0xf1700000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, - "sbcs", "!0C, !1C, #!2m", 4), + "sbcs", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2RevRR, 0xfa90f080, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. + "rev", "!0C, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. + "revsh", "!0C, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2It, 0xbf00, kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES, - "it:!1b", "!0c", 2), + "it:!1b", "!0c", 2, kFixupNone), ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES, - "fmstat", "", 4), + "fmstat", "", 4, kFixupNone), ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, - "vcmp.f64", "!0S, !1S", 4), + "vcmp.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, - "vcmp.f32", "!0s, !1s", 4), + "vcmp.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, - "ldr", "!0C, [r15pc, #!1d]", 4), + "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad), ENCODING_MAP(kThumb2BCond, 0xf0008000, kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP, - "b!1c", "!0t", 4), + "b!1c", "!0t", 4, kFixupCondBranch), ENCODING_MAP(kThumb2Vmovd_RR, 0xeeb00b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f64", "!0S, !1S", 4), + "vmov.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovs_RR, 0xeeb00a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f32", "!0s, !1s", 4), + "vmov.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Fmrs, 0xee100a10, kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "fmrs", "!0C, !1s", 4), + "fmrs", "!0C, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Fmsr, 0xee000a10, kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "fmsr", "!0s, !1C", 4), + "fmsr", "!0s, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2Fmrrd, 0xec500b10, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2, - "fmrrd", "!0C, !1C, !2S", 4), + "fmrrd", "!0C, !1C, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Fmdrr, 0xec400b10, kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "fmdrr", "!0S, !1C, !2C", 4), + "fmdrr", "!0S, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2Vabsd, 0xeeb00bc0, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vabs.f64", "!0S, !1S", 4), + "vabs.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vabss, 0xeeb00ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vabs.f32", "!0s, !1s", 4), + "vabs.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vnegd, 0xeeb10b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vneg.f64", "!0S, !1S", 4), + "vneg.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vnegs, 0xeeb10a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vneg.f32", "!0s, !1s", 4), + "vneg.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovs_IMM8, 0xeeb00a00, kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "vmov.f32", "!0s, #0x!1h", 4), + "vmov.f32", "!0s, #0x!1h", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovd_IMM8, 0xeeb00b00, kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "vmov.f64", "!0S, #0x!1h", 4), + "vmov.f64", "!0S, #0x!1h", 4, kFixupNone), ENCODING_MAP(kThumb2Mla, 0xfb000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3, - "mla", "!0C, !1C, !2C, !3C", 4), + "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2Umull, 0xfba00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, - "umull", "!0C, !1C, !2C, !3C", 4), + "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2Ldrex, 0xe8500f00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrex", "!0C, [!1C, #!2E]", 4), + "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Strex, 0xe8400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, - "strex", "!0C,!1C, [!2C, #!2E]", 4), + "strex", "!0C,!1C, [!2C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, - "clrex", "", 4), + "clrex", "", 4, kFixupNone), ENCODING_MAP(kThumb2Bfi, 0xf3600000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "bfi", "!0C,!1C,#!2d,#!3d", 4), + "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone), ENCODING_MAP(kThumb2Bfc, 0xf36f0000, kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, - "bfc", "!0C,#!1d,#!2d", 4), + "bfc", "!0C,#!1d,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2Dmb, 0xf3bf8f50, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP, - "dmb", "#!0B", 4), + "dmb", "#!0B", 4, kFixupNone), ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, - "ldr", "!0C, [r15pc, -#!1d]", 4), + "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone), ENCODING_MAP(kThumb2Stm, 0xe9000000, kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE, - "stm", "!0C, <!1R>", 4), + "stm", "!0C, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumbUndefined, 0xde00, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, - "undefined", "", 2), + "undefined", "", 2, kFixupNone), // NOTE: vpop, vpush hard-encoded for s16+ reg list ENCODING_MAP(kThumb2VPopCS, 0xecbd8a00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0 - | IS_LOAD, "vpop", "<!0P>", 4), + | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone), ENCODING_MAP(kThumb2VPushCS, 0xed2d8a00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0 - | IS_STORE, "vpush", "<!0P>", 4), + | IS_STORE, "vpush", "<!0P>", 4, kFixupNone), ENCODING_MAP(kThumb2Vldms, 0xec900a00, kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2 - | IS_LOAD, "vldms", "!0C, <!2Q>", 4), + | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone), ENCODING_MAP(kThumb2Vstms, 0xec800a00, kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2 - | IS_STORE, "vstms", "!0C, <!2Q>", 4), + | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone), ENCODING_MAP(kThumb2BUncond, 0xf0009000, kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, - "b", "!0t", 4), + "b", "!0t", 4, kFixupT2Branch), ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, - "movt", "!0C, #!1M", 4), + "movt", "!0C, #!1M", 4, kFixupNone), ENCODING_MAP(kThumb2AddPCR, 0x4487, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_UNARY_OP | REG_USE0 | IS_BRANCH, - "add", "rPC, !0C", 2), + IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, + "add", "rPC, !0C", 2, kFixupLabel), ENCODING_MAP(kThumb2Adr, 0xf20f0000, kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, /* Note: doesn't affect flags */ IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, - "adr", "!0C,#!1d", 4), + "adr", "!0C,#!1d", 4, kFixupAdr), ENCODING_MAP(kThumb2MovImm16LST, 0xf2400000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, - "mov", "!0C, #!1M", 4), + "mov", "!0C, #!1M", 4, kFixupMovImmLST), ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, - "movt", "!0C, #!1M", 4), + "movt", "!0C, #!1M", 4, kFixupMovImmHST), ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 4), + "ldmia", "!0C!!, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumb2SubsRRI12, 0xf1b00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C,!1C,#!2d", 4), + "subs", "!0C,!1C,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2OrrRRRs, 0xea500000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "orrs", "!0C, !1C, !2C!3H", 4), + "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2Push1, 0xf84d0d04, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0 - | IS_STORE, "push1", "!0C", 4), + | IS_STORE, "push1", "!0C", 4, kFixupNone), ENCODING_MAP(kThumb2Pop1, 0xf85d0b04, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0 - | IS_LOAD, "pop1", "!0C", 4), + | IS_LOAD, "pop1", "!0C", 4, kFixupNone), ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "rsbs", "!0C, !1C, !2C!3H", 4), + "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2Smull, 0xfb800000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, - "smull", "!0C, !1C, !2C, !3C", 4), + "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, - "ldrd", "!0C, !1C, [pc, #!2E]", 4), + "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad), ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, - "ldrd", "!0C, !1C, [!2C, #!3E]", 4), + "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, - "strd", "!0C, !1C, [!2C, #!3E]", 4), + "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), }; +// new_lir replaces orig_lir in the pcrel_fixup list. +void ArmMir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { + new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next; + if (UNLIKELY(prev_lir == NULL)) { + first_fixup_ = new_lir; + } else { + prev_lir->u.a.pcrel_next = new_lir; + } + orig_lir->flags.fixup = kFixupNone; +} + +// new_lir is inserted before orig_lir in the pcrel_fixup list. +void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { + new_lir->u.a.pcrel_next = orig_lir; + if (UNLIKELY(prev_lir == NULL)) { + first_fixup_ = new_lir; + } else { + DCHECK(prev_lir->u.a.pcrel_next == orig_lir); + prev_lir->u.a.pcrel_next = new_lir; + } +} + /* * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is * not ready. Since r5FP is not updated often, it is less likely to @@ -997,398 +1047,641 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { */ #define PADDING_MOV_R5_R5 0x1C2D -/* - * Assemble the LIR into binary instruction format. Note that we may - * discover that pc-relative displacements may not fit the selected - * instruction. - */ -AssemblerStatus ArmMir2Lir::AssembleInstructions(uintptr_t start_addr) { - LIR* lir; - AssemblerStatus res = kSuccess; // Assume success - - for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - if (lir->opcode < 0) { - /* 1 means padding is needed */ - if ((lir->opcode == kPseudoPseudoAlign4) && (lir->operands[0] == 1)) { - code_buffer_.push_back(PADDING_MOV_R5_R5 & 0xFF); - code_buffer_.push_back((PADDING_MOV_R5_R5 >> 8) & 0xFF); +void ArmMir2Lir::EncodeLIR(LIR* lir) { + int opcode = lir->opcode; + if (IsPseudoLirOp(opcode)) { + if (UNLIKELY(opcode == kPseudoPseudoAlign4)) { + // Note: size for this opcode will be either 0 or 2 depending on final alignment. + lir->u.a.bytes[0] = (PADDING_MOV_R5_R5 & 0xff); + lir->u.a.bytes[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff); + lir->flags.size = (lir->offset & 0x2); + } + } else if (LIKELY(!lir->flags.is_nop)) { + const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + uint32_t bits = encoder->skeleton; + for (int i = 0; i < 4; i++) { + uint32_t operand; + uint32_t value; + operand = lir->operands[i]; + ArmEncodingKind kind = encoder->field_loc[i].kind; + if (LIKELY(kind == kFmtBitBlt)) { + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; + } else { + switch (encoder->field_loc[i].kind) { + case kFmtSkip: + break; // Nothing to do, but continue to next. + case kFmtUnused: + i = 4; // Done, break out of the enclosing loop. + break; + case kFmtFPImm: + value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; + value |= (operand & 0x0F) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtBrOffset: + value = ((operand & 0x80000) >> 19) << 26; + value |= ((operand & 0x40000) >> 18) << 11; + value |= ((operand & 0x20000) >> 17) << 13; + value |= ((operand & 0x1f800) >> 11) << 16; + value |= (operand & 0x007ff); + bits |= value; + break; + case kFmtShift5: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtShift: + value = ((operand & 0x70) >> 4) << 12; + value |= (operand & 0x0f) << 4; + bits |= value; + break; + case kFmtBWidth: + value = operand - 1; + bits |= value; + break; + case kFmtLsb: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtImm6: + value = ((operand & 0x20) >> 5) << 9; + value |= (operand & 0x1f) << 3; + bits |= value; + break; + case kFmtDfp: { + DCHECK(ARM_DOUBLEREG(operand)); + DCHECK_EQ((operand & 0x1), 0U); + uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1; + /* Snag the 1-bit slice and position it */ + value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= (reg_name & 0x0f) << encoder->field_loc[i].start; + bits |= value; + break; + } + case kFmtSfp: + DCHECK(ARM_SINGLEREG(operand)); + /* Snag the 1-bit slice and position it */ + value = (operand & 0x1) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtImm12: + case kFmtModImm: + value = ((operand & 0x800) >> 11) << 26; + value |= ((operand & 0x700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtImm16: + value = ((operand & 0x0800) >> 11) << 26; + value |= ((operand & 0xf000) >> 12) << 16; + value |= ((operand & 0x0700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtOff24: { + uint32_t signbit = (operand >> 31) & 0x1; + uint32_t i1 = (operand >> 22) & 0x1; + uint32_t i2 = (operand >> 21) & 0x1; + uint32_t imm10 = (operand >> 11) & 0x03ff; + uint32_t imm11 = operand & 0x07ff; + uint32_t j1 = (i1 ^ signbit) ? 0 : 1; + uint32_t j2 = (i2 ^ signbit) ? 0 : 1; + value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | + imm11; + bits |= value; + } + break; + default: + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + } } - continue; } - - if (lir->flags.is_nop) { - continue; + if (encoder->size == 4) { + lir->u.a.bytes[0] = ((bits >> 16) & 0xff); + lir->u.a.bytes[1] = ((bits >> 24) & 0xff); + lir->u.a.bytes[2] = (bits & 0xff); + lir->u.a.bytes[3] = ((bits >> 8) & 0xff); + } else { + DCHECK_EQ(encoder->size, 2); + lir->u.a.bytes[0] = (bits & 0xff); + lir->u.a.bytes[1] = ((bits >> 8) & 0xff); } + lir->flags.size = encoder->size; + } +} - /* - * For PC-relative displacements we won't know if the - * selected instruction will work until late (i.e. - now). - * If something doesn't fit, we must replace the short-form - * operation with a longer-form one. Note, though, that this - * can change code we've already processed, so we'll need to - * re-calculate offsets and restart. To limit the number of - * restarts, the entire list will be scanned and patched. - * Of course, the patching itself may cause new overflows so this - * is an iterative process. - */ - if (lir->flags.pcRelFixup) { - if (lir->opcode == kThumbLdrPcRel || - lir->opcode == kThumb2LdrPcRel12 || - lir->opcode == kThumbAddPcRel || - lir->opcode == kThumb2LdrdPcRel8 || - ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || - ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { - /* - * PC-relative loads are mostly used to load immediates - * that are too large to materialize directly in one shot. - * However, if the load displacement exceeds the limit, - * we revert to a multiple-instruction materialization sequence. - */ - LIR *lir_target = lir->target; - uintptr_t pc = (lir->offset + 4) & ~3; - uintptr_t target = lir_target->offset; - int delta = target - pc; - if (delta & 0x3) { - LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; - } - // First, a sanity check for cases we shouldn't see now - if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || - ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) { - // Shouldn't happen in current codegen. - LOG(FATAL) << "Unexpected pc-rel offset " << delta; - } - // Now, check for the difficult cases - if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || - ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || - ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || - ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { +// Assemble the LIR into binary instruction format. +void ArmMir2Lir::AssembleLIR() { + LIR* lir; + LIR* prev_lir; + cu_->NewTimingSplit("Assemble"); + int assembler_retries = 0; + CodeOffset starting_offset = EncodeRange(first_lir_insn_, last_lir_insn_, 0); + data_offset_ = (starting_offset + 0x3) & ~0x3; + int32_t offset_adjustment; + AssignDataOffsets(); + + /* + * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for non-visited nodes). + * Start at zero here, and bit will be flipped to 1 on entry to the loop. + */ + int generation = 0; + while (true) { + offset_adjustment = 0; + AssemblerStatus res = kSuccess; // Assume success + generation ^= 1; + // Note: nodes requring possible fixup linked in ascending order. + lir = first_fixup_; + prev_lir = NULL; + while (lir != NULL) { + /* + * NOTE: the lir being considered here will be encoded following the switch (so long as + * we're not in a retry situation). However, any new non-pc_rel instructions inserted + * due to retry must be explicitly encoded at the time of insertion. Note that + * inserted instructions don't need use/def flags, but do need size and pc-rel status + * properly updated. + */ + lir->offset += offset_adjustment; + // During pass, allows us to tell whether a node has been updated with offset_adjustment yet. + lir->flags.generation = generation; + switch (static_cast<FixupKind>(lir->flags.fixup)) { + case kFixupLabel: + case kFixupNone: + break; + case kFixupVLoad: + if (lir->operands[1] != r15pc) { + break; + } + // NOTE: intentional fallthrough. + case kFixupLoad: { /* - * Note: because rARM_LR may be used to fix up out-of-range - * vldrs/vldrd we include REG_DEF_LR in the resource - * masks for these instructions. + * PC-relative loads are mostly used to load immediates + * that are too large to materialize directly in one shot. + * However, if the load displacement exceeds the limit, + * we revert to a multiple-instruction materialization sequence. */ - int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12)) - ? lir->operands[0] : rARM_LR; + LIR *lir_target = lir->target; + CodeOffset pc = (lir->offset + 4) & ~3; + CodeOffset target = lir_target->offset + + ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (res != kSuccess) { + /* + * In this case, we're just estimating and will do it again for real. Ensure offset + * is legal. + */ + delta &= ~0x3; + } + DCHECK_EQ((delta & 0x3), 0); + // First, a sanity check for cases we shouldn't see now + if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || + ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) { + // Shouldn't happen in current codegen. + LOG(FATAL) << "Unexpected pc-rel offset " << delta; + } + // Now, check for the difficult cases + if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { + /* + * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we + * sometimes have to use it to fix up out-of-range accesses. This is where that + * happens. + */ + int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || + (lir->opcode == kThumb2LdrPcRel12)) ? lir->operands[0] : rARM_LR; - // Add new Adr to generate the address. - LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, - base_reg, 0, 0, 0, 0, lir->target); - InsertLIRBefore(lir, new_adr); + // Add new Adr to generate the address. + LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, + base_reg, 0, 0, 0, 0, lir->target); + new_adr->offset = lir->offset; + new_adr->flags.fixup = kFixupAdr; + new_adr->flags.size = EncodingMap[kThumb2Adr].size; + InsertLIRBefore(lir, new_adr); + lir->offset += new_adr->flags.size; + offset_adjustment += new_adr->flags.size; - // Convert to normal load. - if (lir->opcode == kThumb2LdrPcRel12) { - lir->opcode = kThumb2LdrRRI12; - } else if (lir->opcode == kThumb2LdrdPcRel8) { - lir->opcode = kThumb2LdrdI8; - } - // Change the load to be relative to the new Adr base. - if (lir->opcode == kThumb2LdrdI8) { - lir->operands[3] = 0; - lir->operands[2] = base_reg; + // lir no longer pcrel, unlink and link in new_adr. + ReplaceFixup(prev_lir, lir, new_adr); + + // Convert to normal load. + offset_adjustment -= lir->flags.size; + if (lir->opcode == kThumb2LdrPcRel12) { + lir->opcode = kThumb2LdrRRI12; + } else if (lir->opcode == kThumb2LdrdPcRel8) { + lir->opcode = kThumb2LdrdI8; + } + lir->flags.size = EncodingMap[lir->opcode].size; + offset_adjustment += lir->flags.size; + // Change the load to be relative to the new Adr base. + if (lir->opcode == kThumb2LdrdI8) { + lir->operands[3] = 0; + lir->operands[2] = base_reg; + } else { + lir->operands[2] = 0; + lir->operands[1] = base_reg; + } + // Must redo encoding here - won't ever revisit this node. + EncodeLIR(lir); + prev_lir = new_adr; // Continue scan with new_adr; + lir = new_adr->u.a.pcrel_next; + res = kRetryAll; + continue; } else { - lir->operands[2] = 0; - lir->operands[1] = base_reg; + if ((lir->opcode == kThumb2Vldrs) || + (lir->opcode == kThumb2Vldrd) || + (lir->opcode == kThumb2LdrdPcRel8)) { + lir->operands[2] = delta >> 2; + } else { + lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : + delta >> 2; + } } - SetupResourceMasks(lir); - res = kRetryAll; - } else { - if ((lir->opcode == kThumb2Vldrs) || - (lir->opcode == kThumb2Vldrd) || - (lir->opcode == kThumb2LdrdPcRel8)) { - lir->operands[2] = delta >> 2; + break; + } + case kFixupCBxZ: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (delta > 126 || delta < 0) { + /* + * Convert to cmp rx,#0 / b[eq/ne] tgt pair + * Make new branch instruction and insert after + */ + LIR* new_inst = + RawLIR(lir->dalvik_offset, kThumbBCond, 0, + (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, + 0, 0, 0, lir->target); + InsertLIRAfter(lir, new_inst); + + /* Convert the cb[n]z to a cmp rx, #0 ] */ + // Subtract the old size. + offset_adjustment -= lir->flags.size; + lir->opcode = kThumbCmpRI8; + /* operand[0] is src1 in both cb[n]z & CmpRI8 */ + lir->operands[1] = 0; + lir->target = 0; + EncodeLIR(lir); // NOTE: sets flags.size. + // Add back the new size. + DCHECK_EQ(lir->flags.size, static_cast<uint32_t>(EncodingMap[lir->opcode].size)); + offset_adjustment += lir->flags.size; + // Set up the new following inst. + new_inst->offset = lir->offset + lir->flags.size; + new_inst->flags.fixup = kFixupCondBranch; + new_inst->flags.size = EncodingMap[new_inst->opcode].size; + offset_adjustment += new_inst->flags.size; + + // lir no longer pcrel, unlink and link in new_inst. + ReplaceFixup(prev_lir, lir, new_inst); + prev_lir = new_inst; // Continue with the new instruction. + lir = new_inst->u.a.pcrel_next; + res = kRetryAll; + continue; } else { - lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : - delta >> 2; + lir->operands[1] = delta >> 1; } + break; } - } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) { - LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - int delta = target - pc; - if (delta > 126 || delta < 0) { - /* - * Convert to cmp rx,#0 / b[eq/ne] tgt pair - * Make new branch instruction and insert after - */ - LIR* new_inst = - RawLIR(lir->dalvik_offset, kThumbBCond, 0, - (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, - 0, 0, 0, lir->target); - InsertLIRAfter(lir, new_inst); - /* Convert the cb[n]z to a cmp rx, #0 ] */ - lir->opcode = kThumbCmpRI8; - /* operand[0] is src1 in both cb[n]z & CmpRI8 */ - lir->operands[1] = 0; - lir->target = 0; - SetupResourceMasks(lir); - res = kRetryAll; - } else { - lir->operands[1] = delta >> 1; - } - } else if (lir->opcode == kThumb2Push || lir->opcode == kThumb2Pop) { - if (__builtin_popcount(lir->operands[0]) == 1) { - /* - * The standard push/pop multiple instruction - * requires at least two registers in the list. - * If we've got just one, switch to the single-reg - * encoding. - */ - lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : - kThumb2Pop1; - int reg = 0; - while (lir->operands[0]) { - if (lir->operands[0] & 0x1) { - break; - } else { - reg++; - lir->operands[0] >>= 1; + case kFixupPushPop: { + if (__builtin_popcount(lir->operands[0]) == 1) { + /* + * The standard push/pop multiple instruction + * requires at least two registers in the list. + * If we've got just one, switch to the single-reg + * encoding. + */ + lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : + kThumb2Pop1; + int reg = 0; + while (lir->operands[0]) { + if (lir->operands[0] & 0x1) { + break; + } else { + reg++; + lir->operands[0] >>= 1; + } } + lir->operands[0] = reg; + // This won't change again, don't bother unlinking, just reset fixup kind + lir->flags.fixup = kFixupNone; } - lir->operands[0] = reg; - SetupResourceMasks(lir); - res = kRetryAll; - } - } else if (lir->opcode == kThumbBCond || lir->opcode == kThumb2BCond) { - LIR *target_lir = lir->target; - int delta = 0; - DCHECK(target_lir); - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - delta = target - pc; - if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { - lir->opcode = kThumb2BCond; - SetupResourceMasks(lir); - res = kRetryAll; + break; } - lir->operands[0] = delta >> 1; - } else if (lir->opcode == kThumb2BUncond) { - LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - int delta = target - pc; - lir->operands[0] = delta >> 1; - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && - lir->operands[0] == 0) { // Useless branch - lir->flags.is_nop = true; - res = kRetryAll; + case kFixupCondBranch: { + LIR *target_lir = lir->target; + int32_t delta = 0; + DCHECK(target_lir); + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + delta = target - pc; + if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { + offset_adjustment -= lir->flags.size; + lir->opcode = kThumb2BCond; + lir->flags.size = EncodingMap[lir->opcode].size; + // Fixup kind remains the same. + offset_adjustment += lir->flags.size; + res = kRetryAll; + } + lir->operands[0] = delta >> 1; + break; } - } else if (lir->opcode == kThumbBUncond) { - LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - int delta = target - pc; - if (delta > 2046 || delta < -2048) { - // Convert to Thumb2BCond w/ kArmCondAl - lir->opcode = kThumb2BUncond; - lir->operands[0] = 0; - SetupResourceMasks(lir); - res = kRetryAll; - } else { + case kFixupT2Branch: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; lir->operands[0] = delta >> 1; - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && - lir->operands[0] == -1) { // Useless branch + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) { + // Useless branch + offset_adjustment -= lir->flags.size; lir->flags.is_nop = true; + // Don't unlink - just set to do-nothing. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } + break; + } + case kFixupT1Branch: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (delta > 2046 || delta < -2048) { + // Convert to Thumb2BCond w/ kArmCondAl + offset_adjustment -= lir->flags.size; + lir->opcode = kThumb2BUncond; + lir->operands[0] = 0; + lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.fixup = kFixupT2Branch; + offset_adjustment += lir->flags.size; res = kRetryAll; + } else { + lir->operands[0] = delta >> 1; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) { + // Useless branch + offset_adjustment -= lir->flags.size; + lir->flags.is_nop = true; + // Don't unlink - just set to do-nothing. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } } + break; } - } else if (lir->opcode == kThumbBlx1) { - DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); - /* cur_pc is Thumb */ - uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3; - uintptr_t target = lir->operands[1]; + case kFixupBlx1: { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); + /* cur_pc is Thumb */ + CodeOffset cur_pc = (lir->offset + 4) & ~3; + CodeOffset target = lir->operands[1]; - /* Match bit[1] in target with base */ - if (cur_pc & 0x2) { - target |= 0x2; - } - int delta = target - cur_pc; - DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + /* Match bit[1] in target with base */ + if (cur_pc & 0x2) { + target |= 0x2; + } + int32_t delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); - lir->operands[0] = (delta >> 12) & 0x7ff; - NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; - } else if (lir->opcode == kThumbBl1) { - DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); - /* Both cur_pc and target are Thumb */ - uintptr_t cur_pc = start_addr + lir->offset + 4; - uintptr_t target = lir->operands[1]; + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + break; + } + case kFixupBl1: { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); + /* Both cur_pc and target are Thumb */ + CodeOffset cur_pc = lir->offset + 4; + CodeOffset target = lir->operands[1]; - int delta = target - cur_pc; - DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + int32_t delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); - lir->operands[0] = (delta >> 12) & 0x7ff; - NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; - } else if (lir->opcode == kThumb2Adr) { - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]); - LIR* target = lir->target; - int target_disp = tab_rec ? tab_rec->offset - : target->offset; - int disp = target_disp - ((lir->offset + 4) & ~3); - if (disp < 4096) { - lir->operands[1] = disp; - } else { - // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] - // TUNING: if this case fires often, it can be improved. Not expected to be common. - LIR *new_mov16L = - RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, - lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir), - reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target); - InsertLIRBefore(lir, new_mov16L); - LIR *new_mov16H = - RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, - lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir), - reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target); - InsertLIRBefore(lir, new_mov16H); - if (ARM_LOWREG(lir->operands[0])) { - lir->opcode = kThumbAddRRLH; + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + break; + } + case kFixupAdr: { + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2])); + LIR* target = lir->target; + int32_t target_disp = (tab_rec != NULL) ? tab_rec->offset + offset_adjustment + : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t disp = target_disp - ((lir->offset + 4) & ~3); + if (disp < 4096) { + lir->operands[1] = disp; } else { - lir->opcode = kThumbAddRRHH; + // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] + // TUNING: if this case fires often, it can be improved. Not expected to be common. + LIR *new_mov16L = + RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0, + WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); + new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size; + new_mov16L->flags.fixup = kFixupMovImmLST; + new_mov16L->offset = lir->offset; + // Link the new instruction, retaining lir. + InsertLIRBefore(lir, new_mov16L); + lir->offset += new_mov16L->flags.size; + offset_adjustment += new_mov16L->flags.size; + InsertFixupBefore(prev_lir, lir, new_mov16L); + prev_lir = new_mov16L; // Now we've got a new prev. + LIR *new_mov16H = + RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0, + WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); + new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size; + new_mov16H->flags.fixup = kFixupMovImmHST; + new_mov16H->offset = lir->offset; + // Link the new instruction, retaining lir. + InsertLIRBefore(lir, new_mov16H); + lir->offset += new_mov16H->flags.size; + offset_adjustment += new_mov16H->flags.size; + InsertFixupBefore(prev_lir, lir, new_mov16H); + prev_lir = new_mov16H; // Now we've got a new prev. + + offset_adjustment -= lir->flags.size; + if (ARM_LOWREG(lir->operands[0])) { + lir->opcode = kThumbAddRRLH; + } else { + lir->opcode = kThumbAddRRHH; + } + lir->operands[1] = rARM_PC; + lir->flags.size = EncodingMap[lir->opcode].size; + offset_adjustment += lir->flags.size; + // Must stay in fixup list and have offset updated; will be used by LST/HSP pair. + lir->flags.fixup = kFixupNone; + res = kRetryAll; } - lir->operands[1] = rARM_PC; - SetupResourceMasks(lir); - res = kRetryAll; - } - } else if (lir->opcode == kThumb2MovImm16LST) { - // operands[1] should hold disp, [2] has add, [3] has tab_rec - LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]); - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); - // If tab_rec is null, this is a literal load. Use target - LIR* target = lir->target; - int target_disp = tab_rec ? tab_rec->offset : target->offset; - lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; - } else if (lir->opcode == kThumb2MovImm16HST) { - // operands[1] should hold disp, [2] has add, [3] has tab_rec - LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]); - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); - // If tab_rec is null, this is a literal load. Use target - LIR* target = lir->target; - int target_disp = tab_rec ? tab_rec->offset : target->offset; - lir->operands[1] = - ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; - } - } - /* - * If one of the pc-relative instructions expanded we'll have - * to make another pass. Don't bother to fully assemble the - * instruction. - */ - if (res != kSuccess) { - continue; - } - const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; - uint32_t bits = encoder->skeleton; - int i; - for (i = 0; i < 4; i++) { - uint32_t operand; - uint32_t value; - operand = lir->operands[i]; - switch (encoder->field_loc[i].kind) { - case kFmtUnused: - break; - case kFmtFPImm: - value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; - value |= (operand & 0x0F) << encoder->field_loc[i].start; - bits |= value; - break; - case kFmtBrOffset: - value = ((operand & 0x80000) >> 19) << 26; - value |= ((operand & 0x40000) >> 18) << 11; - value |= ((operand & 0x20000) >> 17) << 13; - value |= ((operand & 0x1f800) >> 11) << 16; - value |= (operand & 0x007ff); - bits |= value; - break; - case kFmtShift5: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtShift: - value = ((operand & 0x70) >> 4) << 12; - value |= (operand & 0x0f) << 4; - bits |= value; - break; - case kFmtBWidth: - value = operand - 1; - bits |= value; - break; - case kFmtLsb: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtImm6: - value = ((operand & 0x20) >> 5) << 9; - value |= (operand & 0x1f) << 3; - bits |= value; - break; - case kFmtBitBlt: - value = (operand << encoder->field_loc[i].start) & - ((1 << (encoder->field_loc[i].end + 1)) - 1); - bits |= value; - break; - case kFmtDfp: { - DCHECK(ARM_DOUBLEREG(operand)); - DCHECK_EQ((operand & 0x1), 0U); - int reg_name = (operand & ARM_FP_REG_MASK) >> 1; - /* Snag the 1-bit slice and position it */ - value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= (reg_name & 0x0f) << encoder->field_loc[i].start; - bits |= value; break; } - case kFmtSfp: - DCHECK(ARM_SINGLEREG(operand)); - /* Snag the 1-bit slice and position it */ - value = (operand & 0x1) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; - bits |= value; + case kFixupMovImmLST: { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; break; - case kFmtImm12: - case kFmtModImm: - value = ((operand & 0x800) >> 11) << 26; - value |= ((operand & 0x700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; - break; - case kFmtImm16: - value = ((operand & 0x0800) >> 11) << 26; - value |= ((operand & 0xf000) >> 12) << 16; - value |= ((operand & 0x0700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; + } + case kFixupMovImmHST: { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = + ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; break; - case kFmtOff24: { - uint32_t signbit = (operand >> 31) & 0x1; - uint32_t i1 = (operand >> 22) & 0x1; - uint32_t i2 = (operand >> 21) & 0x1; - uint32_t imm10 = (operand >> 11) & 0x03ff; - uint32_t imm11 = operand & 0x07ff; - uint32_t j1 = (i1 ^ signbit) ? 0 : 1; - uint32_t j2 = (i2 ^ signbit) ? 0 : 1; - value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | - imm11; - bits |= value; + } + case kFixupAlign4: { + int32_t required_size = lir->offset & 0x2; + if (lir->flags.size != required_size) { + offset_adjustment += required_size - lir->flags.size; + lir->flags.size = required_size; + res = kRetryAll; } break; + } default: - LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + LOG(FATAL) << "Unexpected case " << lir->flags.fixup; } + /* + * If one of the pc-relative instructions expanded we'll have + * to make another pass. Don't bother to fully assemble the + * instruction. + */ + if (res == kSuccess) { + EncodeLIR(lir); + if (assembler_retries == 0) { + // Go ahead and fix up the code buffer image. + for (int i = 0; i < lir->flags.size; i++) { + code_buffer_[lir->offset + i] = lir->u.a.bytes[i]; + } + } + } + prev_lir = lir; + lir = lir->u.a.pcrel_next; } - if (encoder->size == 4) { - code_buffer_.push_back((bits >> 16) & 0xff); - code_buffer_.push_back((bits >> 24) & 0xff); + + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + starting_offset += offset_adjustment; + data_offset_ = (starting_offset + 0x3) & ~0x3; + AssignDataOffsets(); } - code_buffer_.push_back(bits & 0xff); - code_buffer_.push_back((bits >> 8) & 0xff); } - return res; + + // Rebuild the CodeBuffer if we had to retry; otherwise it should be good as-is. + if (assembler_retries != 0) { + code_buffer_.clear(); + for (LIR* lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + if (lir->flags.is_nop) { + continue; + } else { + for (int i = 0; i < lir->flags.size; i++) { + code_buffer_.push_back(lir->u.a.bytes[i]); + } + } + } + } + + data_offset_ = (code_buffer_.size() + 0x3) & ~0x3; + + cu_->NewTimingSplit("LiteralData"); + // Install literals + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); } int ArmMir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); return EncodingMap[lir->opcode].size; } +// Encode instruction bit pattern and assign offsets. +uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) { + LIR* end_lir = tail_lir->next; + + /* + * A significant percentage of methods can be assembled in a single pass. We'll + * go ahead and build the code image here, leaving holes for pc-relative fixup + * codes. If the code size changes during that pass, we'll have to throw away + * this work - but if not, we're ready to go. + */ + code_buffer_.reserve(estimated_native_code_size_ + 256); // Add a little slop. + LIR* last_fixup = NULL; + for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (!lir->flags.is_nop) { + if (lir->flags.fixup != kFixupNone) { + if (!IsPseudoLirOp(lir->opcode)) { + lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.fixup = EncodingMap[lir->opcode].fixup; + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + lir->flags.size = (offset & 0x2); + lir->flags.fixup = kFixupAlign4; + } else { + lir->flags.size = 0; + lir->flags.fixup = kFixupLabel; + } + // Link into the fixup chain. + lir->flags.use_def_invalid = true; + lir->u.a.pcrel_next = NULL; + if (first_fixup_ == NULL) { + first_fixup_ = lir; + } else { + last_fixup->u.a.pcrel_next = lir; + } + last_fixup = lir; + } else { + EncodeLIR(lir); + } + for (int i = 0; i < lir->flags.size; i++) { + code_buffer_.push_back(lir->u.a.bytes[i]); + } + offset += lir->flags.size; + } + } + return offset; +} + +void ArmMir2Lir::AssignDataOffsets() { + /* Set up offsets for literals */ + CodeOffset offset = data_offset_; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + total_size_ = AssignFillArrayDataOffset(offset); +} + } // namespace art diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 2dbe5f5c36..51aca8540c 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -92,7 +92,7 @@ void ArmMir2Lir::LockLiveArgs(MIR* mir) { } /* Find the next MIR, which may be in a following basic block */ -// TODO: should this be a utility in mir_graph? +// TODO: make this a utility in mir_graph. MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) { BasicBlock* bb = *p_bb; MIR* orig_mir = mir; @@ -103,7 +103,7 @@ MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) { if (mir != NULL) { return mir; } else { - bb = bb->fall_through; + bb = mir_graph_->GetBasicBlock(bb->fall_through); *p_bb = bb; if (bb) { mir = bb->first_mir_insn; @@ -120,17 +120,18 @@ MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) { // TODO: move to common code void ArmMir2Lir::GenPrintLabel(MIR* mir) { /* Mark the beginning of a Dalvik instruction for line tracking */ - char* inst_str = cu_->verbose ? - mir_graph_->GetDalvikDisassembly(mir) : NULL; - MarkBoundary(mir->offset, inst_str); + if (cu_->verbose) { + char* inst_str = mir_graph_->GetDalvikDisassembly(mir); + MarkBoundary(mir->offset, inst_str); + } } MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object) { - int field_offset; + int32_t field_offset; bool is_volatile; uint32_t field_idx = mir->dalvikInsn.vC; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile); if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { return NULL; } @@ -152,10 +153,10 @@ MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object) { - int field_offset; + int32_t field_offset; bool is_volatile; uint32_t field_idx = mir->dalvikInsn.vC; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile); if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { return NULL; } @@ -319,9 +320,9 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; - int size = table[1]; + uint32_t size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + ArenaAllocator::kAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -337,7 +338,7 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, r_key = tmp; } // Materialize a pointer to the switch table - NewLIR3(kThumb2Adr, rBase, 0, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR3(kThumb2Adr, rBase, 0, WrapPointer(tab_rec)); // Set up r_idx int r_idx = AllocTemp(); LoadConstant(r_idx, size); @@ -367,7 +368,7 @@ void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; - int size = table[1]; + uint32_t size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), ArenaAllocator::kAllocLIR)); switch_tables_.Insert(tab_rec); @@ -376,7 +377,7 @@ void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, rl_src = LoadValue(rl_src, kCoreReg); int table_base = AllocTemp(); // Materialize a pointer to the switch table - NewLIR3(kThumb2Adr, table_base, 0, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR3(kThumb2Adr, table_base, 0, WrapPointer(tab_rec)); int low_key = s4FromSwitchData(&table[2]); int keyReg; // Remove the bias, if necessary @@ -432,95 +433,127 @@ void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData).Int32Value(), rARM_LR); // Materialize a pointer to the fill data image - NewLIR3(kThumb2Adr, r1, 0, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec)); ClobberCalleeSave(); LIR* call_inst = OpReg(kOpBlx, rARM_LR); MarkSafepointPC(call_inst); } /* - * Handle simple case (thin lock) inline. If it's complicated, bail - * out to the heavyweight lock/unlock routines. We'll use dedicated - * registers here in order to be in the right position in case we - * to bail to oat[Lock/Unlock]Object(self, object) - * - * r0 -> self pointer [arg0 for oat[Lock/Unlock]Object - * r1 -> object [arg1 for oat[Lock/Unlock]Object - * r2 -> intial contents of object->lock, later result of strex - * r3 -> self->thread_id - * r12 -> allow to be used by utilities as general temp - * - * The result of the strex is 0 if we acquire the lock. - * - * See comments in monitor.cc for the layout of the lock word. - * Of particular interest to this code is the test for the - * simple case - which we handle inline. For monitor enter, the - * simple case is thin lock, held by no-one. For monitor exit, - * the simple case is thin lock, held by the unlocking thread with - * a recurse count of 0. - * - * A minor complication is that there is a field in the lock word - * unrelated to locking: the hash state. This field must be ignored, but - * preserved. - * + * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more + * details see monitor.cc. */ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { FlushAllRegs(); - DCHECK_EQ(LW_SHAPE_THIN, 0); LoadValueDirectFixed(rl_src, r0); // Get obj LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, r0, opt_flags); - LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); - NewLIR3(kThumb2Ldrex, r1, r0, - mirror::Object::MonitorOffset().Int32Value() >> 2); // Get object->lock - // Align owner - OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT); - // Is lock unheld on lock or held by us (==thread_id) on unlock? - NewLIR4(kThumb2Bfi, r2, r1, 0, LW_LOCK_OWNER_SHIFT - 1); - NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); - OpRegImm(kOpCmp, r1, 0); - OpIT(kCondEq, ""); - NewLIR4(kThumb2Strex, r1, r2, r0, - mirror::Object::MonitorOffset().Int32Value() >> 2); - OpRegImm(kOpCmp, r1, 0); - OpIT(kCondNe, "T"); - // Go expensive route - artLockObjectFromCode(self, obj); - LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, rARM_LR); - MarkSafepointPC(call_inst); - GenMemBarrier(kLoadLoad); + constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. + if (kArchVariantHasGoodBranchPredictor) { + LIR* null_check_branch; + if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { + null_check_branch = nullptr; // No null check. + } else { + // If the null-check fails its handled by the slow-path to reduce exception related meta-data. + null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL); + } + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL); + NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL); + + + LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + not_unlocked_branch->target = slow_path_target; + if (null_check_branch != nullptr) { + null_check_branch->target = slow_path_target; + } + // TODO: move to a slow path. + // Go expensive route - artLockObjectFromCode(obj); + LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); + + LIR* success_target = NewLIR0(kPseudoTargetLabel); + lock_success_branch->target = success_target; + GenMemBarrier(kLoadLoad); + } else { + // Explicit null-check as slow-path is entered using an IT. + GenNullCheck(rl_src.s_reg_low, r0, opt_flags); + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, r1, 0); + OpIT(kCondEq, ""); + NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, r1, 0); + OpIT(kCondNe, "T"); + // Go expensive route - artLockObjectFromCode(self, obj); + LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR); + MarkSafepointPC(call_inst); + GenMemBarrier(kLoadLoad); + } } /* - * For monitor unlock, we don't have to use ldrex/strex. Once - * we've determined that the lock is thin and that we own it with - * a zero recursion count, it's safe to punch it back to the - * initial, unlock thin state with a store word. + * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more + * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock + * and can only give away ownership if its suspended. */ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { - DCHECK_EQ(LW_SHAPE_THIN, 0); FlushAllRegs(); LoadValueDirectFixed(rl_src, r0); // Get obj LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, r0, opt_flags); - LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); // Get lock + LIR* null_check_branch; LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); - // Is lock unheld on lock or held by us (==thread_id) on unlock? - OpRegRegImm(kOpAnd, r3, r1, - (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); - // Align owner - OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT); - NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); - OpRegReg(kOpSub, r1, r2); - OpIT(kCondEq, "EE"); - StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3); - // Go expensive route - UnlockObjectFromCode(obj); - LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, rARM_LR); - MarkSafepointPC(call_inst); - GenMemBarrier(kStoreLoad); + constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. + if (kArchVariantHasGoodBranchPredictor) { + if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { + null_check_branch = nullptr; // No null check. + } else { + // If the null-check fails its handled by the slow-path to reduce exception related meta-data. + null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL); + } + LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); + LoadConstantNoClobber(r3, 0); + LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL); + StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3); + LIR* unlock_success_branch = OpUnconditionalBranch(NULL); + + LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + slow_unlock_branch->target = slow_path_target; + if (null_check_branch != nullptr) { + null_check_branch->target = slow_path_target; + } + // TODO: move to a slow path. + // Go expensive route - artUnlockObjectFromCode(obj); + LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); + + LIR* success_target = NewLIR0(kPseudoTargetLabel); + unlock_success_branch->target = success_target; + GenMemBarrier(kStoreLoad); + } else { + // Explicit null-check as slow-path is entered using an IT. + GenNullCheck(rl_src.s_reg_low, r0, opt_flags); + LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); // Get lock + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + LoadConstantNoClobber(r3, 0); + // Is lock unheld on lock or held by us (==thread_id) on unlock? + OpRegReg(kOpCmp, r1, r2); + OpIT(kCondEq, "EE"); + StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3); + // Go expensive route - UnlockObjectFromCode(obj); + LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR); + MarkSafepointPC(call_inst); + GenMemBarrier(kStoreLoad); + } } void ArmMir2Lir::GenMoveException(RegLocation rl_dest) { diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 291319f258..15355be9d7 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -51,7 +51,6 @@ class ArmMir2Lir : public Mir2Lir { int AllocTypedTempPair(bool fp_hint, int reg_class); int S2d(int low_reg, int high_reg); int TargetReg(SpecialTargetRegister reg); - RegisterInfo* GetRegInfo(int reg); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -71,9 +70,13 @@ class ArmMir2Lir : public Mir2Lir { void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. - AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void AssembleLIR(); + uint32_t EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t starting_offset); + int AssignInsnOffsets(); + void AssignOffsets(); + void EncodeLIR(LIR* lir); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); @@ -85,12 +88,10 @@ class ArmMir2Lir : public Mir2Lir { // Required for target - Dalvik-level generators. void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, - RegLocation rl_src, int scale); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); - void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -106,6 +107,8 @@ class ArmMir2Lir : public Mir2Lir { bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -118,7 +121,7 @@ class ArmMir2Lir : public Mir2Lir { void GenDivZeroCheck(int reg_lo, int reg_hi); void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); void GenExitSequence(); - void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); @@ -130,8 +133,8 @@ class ArmMir2Lir : public Mir2Lir { int first_bit, int second_bit); void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); // Required for target - single operation generators. @@ -188,6 +191,9 @@ class ArmMir2Lir : public Mir2Lir { MIR* SpecialIdentity(MIR* mir); LIR* LoadFPConstantValue(int r_dest, int value); bool BadOverlap(RegLocation rl_src, RegLocation rl_dest); + void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void AssignDataOffsets(); }; } // namespace art diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc index 08d6778129..480e0218d5 100644 --- a/compiler/dex/quick/arm/fp_arm.cc +++ b/compiler/dex/quick/arm/fp_arm.cc @@ -176,7 +176,7 @@ void ArmMir2Lir::GenConversion(Instruction::Code opcode, void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) { - LIR* target = &block_label_list_[bb->taken->id]; + LIR* target = &block_label_list_[bb->taken]; RegLocation rl_src1; RegLocation rl_src2; if (is_double) { diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 6fbdd2fd49..42bf3d4d00 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -24,8 +24,7 @@ namespace art { -LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, - int src2, LIR* target) { +LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) { OpRegReg(kOpCmp, src1, src2); return OpCondBranch(cond, target); } @@ -123,8 +122,8 @@ void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int32_t val_hi = High32Bits(val); DCHECK_GE(ModifiedImmediate(val_lo), 0); DCHECK_GE(ModifiedImmediate(val_hi), 0); - LIR* taken = &block_label_list_[bb->taken->id]; - LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); int32_t low_reg = rl_src1.low_reg; int32_t high_reg = rl_src1.high_reg; @@ -179,23 +178,6 @@ void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); - // Temporary debugging code - int dest_sreg = mir->ssa_rep->defs[0]; - if ((dest_sreg < 0) || (dest_sreg >= mir_graph_->GetNumSSARegs())) { - LOG(INFO) << "Bad target sreg: " << dest_sreg << ", in " - << PrettyMethod(cu_->method_idx, *cu_->dex_file); - LOG(INFO) << "at dex offset 0x" << std::hex << mir->offset; - LOG(INFO) << "vreg = " << mir_graph_->SRegToVReg(dest_sreg); - LOG(INFO) << "num uses = " << mir->ssa_rep->num_uses; - if (mir->ssa_rep->num_uses == 1) { - LOG(INFO) << "CONST case, vals = " << mir->dalvikInsn.vB << ", " << mir->dalvikInsn.vC; - } else { - LOG(INFO) << "MOVE case, operands = " << mir->ssa_rep->uses[1] << ", " - << mir->ssa_rep->uses[2]; - } - CHECK(false) << "Invalid target sreg on Select."; - } - // End temporary debugging code RegLocation rl_dest = mir_graph_->GetDest(mir); rl_src = LoadValue(rl_src, kCoreReg); if (mir->ssa_rep->num_uses == 1) { @@ -234,11 +216,17 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { rl_false = LoadValue(rl_false, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegImm(kOpCmp, rl_src.low_reg, 0); - OpIT(kCondEq, "E"); - LIR* l1 = OpRegCopy(rl_result.low_reg, rl_true.low_reg); - l1->flags.is_nop = false; // Make sure this instruction isn't optimized away - LIR* l2 = OpRegCopy(rl_result.low_reg, rl_false.low_reg); - l2->flags.is_nop = false; // Make sure this instruction isn't optimized away + if (rl_result.low_reg == rl_true.low_reg) { // Is the "true" case already in place? + OpIT(kCondNe, ""); + OpRegCopy(rl_result.low_reg, rl_false.low_reg); + } else if (rl_result.low_reg == rl_false.low_reg) { // False case in place? + OpIT(kCondEq, ""); + OpRegCopy(rl_result.low_reg, rl_true.low_reg); + } else { // Normal - select between the two. + OpIT(kCondEq, "E"); + OpRegCopy(rl_result.low_reg, rl_true.low_reg); + OpRegCopy(rl_result.low_reg, rl_false.low_reg); + } GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } StoreValue(rl_dest, rl_result); @@ -265,8 +253,8 @@ void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { return; } } - LIR* taken = &block_label_list_[bb->taken->id]; - LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); OpRegReg(kOpCmp, rl_src1.high_reg, rl_src2.high_reg); @@ -313,7 +301,18 @@ LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* branch; int mod_imm; ArmConditionCode arm_cond = ArmConditionEncoding(cond); - if ((ARM_LOWREG(reg)) && (check_value == 0) && + /* + * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit + * compare-and-branch if zero is ideal if it will reach. However, because null checks + * branch forward to a launch pad, they will frequently not reach - and thus have to + * be converted to a long form during assembly (which will trigger another assembly + * pass). Here we estimate the branch distance for checks, and if large directly + * generate the long form in an attempt to avoid an extra assembly pass. + * TODO: consider interspersing launchpads in code following unconditional branches. + */ + bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget)); + skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64); + if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) && ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, reg, 0); @@ -467,14 +466,39 @@ LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code, RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit, bool is_div) { - LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; - return rl_dest; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + // Put the literal in a temp. + int lit_temp = AllocTemp(); + LoadConstant(lit_temp, lit); + // Use the generic case for div/rem with arg2 in a register. + // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. + rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); + FreeTemp(lit_temp); + + return rl_result; } RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2, bool is_div) { - LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; - return rl_dest; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (is_div) { + // Simple case, use sdiv instruction. + OpRegRegReg(kOpDiv, rl_result.low_reg, reg1, reg2); + } else { + // Remainder case, use the following code: + // temp = reg1 / reg2 - integer division + // temp = temp * reg2 + // dest = reg1 - temp + + int temp = AllocTemp(); + OpRegRegReg(kOpDiv, temp, reg1, reg2); + OpRegReg(kOpMul, temp, reg2); + OpRegRegReg(kOpSub, rl_result.low_reg, reg1, temp); + FreeTemp(temp); + } + + return rl_result; } bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { @@ -494,6 +518,50 @@ bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { return true; } +bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == kLong) { + // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. + if (rl_address.low_reg != rl_result.low_reg) { + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, kWord, INVALID_SREG); + LoadBaseDisp(rl_address.low_reg, 4, rl_result.high_reg, kWord, INVALID_SREG); + } else { + LoadBaseDisp(rl_address.low_reg, 4, rl_result.high_reg, kWord, INVALID_SREG); + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, kWord, INVALID_SREG); + } + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + } + return true; +} + +bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + if (size == kLong) { + // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. + RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, kWord); + StoreBaseDisp(rl_address.low_reg, 4, rl_value.high_reg, kWord); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); + } + return true; +} + void ArmMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { LOG(FATAL) << "Unexpected use of OpLea for Arm"; } @@ -618,7 +686,7 @@ void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { break; } LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor); - dmb->def_mask = ENCODE_ALL; + dmb->u.m.def_mask = ENCODE_ALL; #endif } @@ -755,7 +823,7 @@ void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1, * Generate array load */ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale) { + RegLocation rl_index, RegLocation rl_dest, int scale) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -845,13 +913,13 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, * */ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset; bool constant_index = rl_index.is_const; - if (rl_src.wide) { + int data_offset; + if (size == kLong || size == kDouble) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); @@ -868,12 +936,14 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } int reg_ptr; + bool allocated_reg_ptr_temp = false; if (constant_index) { reg_ptr = rl_array.low_reg; - } else if (IsTemp(rl_array.low_reg)) { + } else if (IsTemp(rl_array.low_reg) && !card_mark) { Clobber(rl_array.low_reg); reg_ptr = rl_array.low_reg; } else { + allocated_reg_ptr_temp = true; reg_ptr = AllocTemp(); } @@ -924,71 +994,15 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg, scale, size); } - if (!constant_index) { + if (allocated_reg_ptr_temp) { FreeTemp(reg_ptr); } -} - -/* - * Generate array store - * - */ -void ArmMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { - int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); - - FlushAllRegs(); // Use explicit registers - LockCallTemps(); - - int r_value = TargetReg(kArg0); // Register holding value - int r_array_class = TargetReg(kArg1); // Register holding array's Class - int r_array = TargetReg(kArg2); // Register holding array - int r_index = TargetReg(kArg3); // Register holding index into array - - LoadValueDirectFixed(rl_array, r_array); // Grab array - LoadValueDirectFixed(rl_src, r_value); // Grab value - LoadValueDirectFixed(rl_index, r_index); // Grab index - - GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? - - // Store of null? - LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); - - // Get the array's class. - LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value, - r_array_class, true); - // Redo LoadValues in case they didn't survive the call. - LoadValueDirectFixed(rl_array, r_array); // Reload array - LoadValueDirectFixed(rl_index, r_index); // Reload index - LoadValueDirectFixed(rl_src, r_value); // Reload value - r_array_class = INVALID_REG; - - // Branch here if value to be stored == null - LIR* target = NewLIR0(kPseudoTargetLabel); - null_value_check->target = target; - - bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); - int reg_len = INVALID_REG; - if (needs_range_check) { - reg_len = TargetReg(kArg1); - LoadWordDisp(r_array, len_offset, reg_len); // Get len - } - /* r_ptr -> array data */ - int r_ptr = AllocTemp(); - OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset); - if (needs_range_check) { - GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds); - } - StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord); - FreeTemp(r_ptr); - FreeTemp(r_index); - if (!mir_graph_->IsConstantNullRef(rl_src)) { - MarkGCCard(r_value, r_array); + if (card_mark) { + MarkGCCard(rl_src.low_reg, rl_array.low_reg); } } + void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { rl_src = LoadValueWide(rl_src, kCoreReg); diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 6cc3052da1..52aba9b4df 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -74,6 +74,8 @@ int ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res = rARM_RET0; break; case kRet1: res = rARM_RET1; break; case kInvokeTgt: res = rARM_INVOKE_TGT; break; + case kHiddenArg: res = r12; break; + case kHiddenFpArg: res = INVALID_REG; break; case kCount: res = rARM_COUNT; break; } return res; @@ -118,78 +120,83 @@ uint64_t ArmMir2Lir::GetPCUseDefEncoding() { return ENCODE_ARM_REG_PC; } -void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir) { +// Thumb2 specific setup. TODO: inline?: +void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { DCHECK_EQ(cu_->instruction_set, kThumb2); + DCHECK(!lir->flags.use_def_invalid); - // Thumb2 specific setup - uint64_t flags = ArmMir2Lir::EncodingMap[lir->opcode].flags; int opcode = lir->opcode; - if (flags & REG_DEF_SP) { - lir->def_mask |= ENCODE_ARM_REG_SP; - } + // These flags are somewhat uncommon - bypass if we can. + if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 | + REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 | + REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) { + if (flags & REG_DEF_SP) { + lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + } - if (flags & REG_USE_SP) { - lir->use_mask |= ENCODE_ARM_REG_SP; - } + if (flags & REG_USE_SP) { + lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + } - if (flags & REG_DEF_LIST0) { - lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); - } + if (flags & REG_DEF_LIST0) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } - if (flags & REG_DEF_LIST1) { - lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); - } + if (flags & REG_DEF_LIST1) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } - if (flags & REG_DEF_FPCS_LIST0) { - lir->def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); - } + if (flags & REG_DEF_FPCS_LIST0) { + lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } - if (flags & REG_DEF_FPCS_LIST2) { - for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->def_mask, lir->operands[1] + i); + if (flags & REG_DEF_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i); + } } - } - if (flags & REG_USE_PC) { - lir->use_mask |= ENCODE_ARM_REG_PC; - } + if (flags & REG_USE_PC) { + lir->u.m.use_mask |= ENCODE_ARM_REG_PC; + } - /* Conservatively treat the IT block */ - if (flags & IS_IT) { - lir->def_mask = ENCODE_ALL; - } + /* Conservatively treat the IT block */ + if (flags & IS_IT) { + lir->u.m.def_mask = ENCODE_ALL; + } - if (flags & REG_USE_LIST0) { - lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); - } + if (flags & REG_USE_LIST0) { + lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } - if (flags & REG_USE_LIST1) { - lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); - } + if (flags & REG_USE_LIST1) { + lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } - if (flags & REG_USE_FPCS_LIST0) { - lir->use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); - } + if (flags & REG_USE_FPCS_LIST0) { + lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } - if (flags & REG_USE_FPCS_LIST2) { - for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->use_mask, lir->operands[1] + i); + if (flags & REG_USE_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i); + } } - } - /* Fixup for kThumbPush/lr and kThumbPop/pc */ - if (opcode == kThumbPush || opcode == kThumbPop) { - uint64_t r8Mask = GetRegMaskCommon(r8); - if ((opcode == kThumbPush) && (lir->use_mask & r8Mask)) { - lir->use_mask &= ~r8Mask; - lir->use_mask |= ENCODE_ARM_REG_LR; - } else if ((opcode == kThumbPop) && (lir->def_mask & r8Mask)) { - lir->def_mask &= ~r8Mask; - lir->def_mask |= ENCODE_ARM_REG_PC; + /* Fixup for kThumbPush/lr and kThumbPop/pc */ + if (opcode == kThumbPush || opcode == kThumbPop) { + uint64_t r8Mask = GetRegMaskCommon(r8); + if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) { + lir->u.m.use_mask &= ~r8Mask; + lir->u.m.use_mask |= ENCODE_ARM_REG_LR; + } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) { + lir->u.m.def_mask &= ~r8Mask; + lir->u.m.def_mask |= ENCODE_ARM_REG_PC; + } + } + if (flags & REG_DEF_LR) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LR; } - } - if (flags & REG_DEF_LR) { - lir->def_mask |= ENCODE_ARM_REG_LR; } } @@ -277,8 +284,8 @@ static char* DecodeFPCSRegList(int count, int base, char* buf) { return buf; } -static int ExpandImmediate(int value) { - int mode = (value & 0xf00) >> 8; +static int32_t ExpandImmediate(int value) { + int32_t mode = (value & 0xf00) >> 8; uint32_t bits = value & 0xff; switch (mode) { case 0: @@ -466,8 +473,8 @@ void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefi /* Memory bits */ if (arm_lir && (mask & ENCODE_DALVIK_REG)) { - sprintf(buf + strlen(buf), "dr%d%s", arm_lir->alias_info & 0xffff, - (arm_lir->alias_info & 0x80000000) ? "(+1)" : ""); + sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), + DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); } if (mask & ENCODE_LITERAL) { strcat(buf, "lit "); @@ -691,11 +698,6 @@ RegLocation ArmMir2Lir::GetReturnAlt() { return res; } -ArmMir2Lir::RegisterInfo* ArmMir2Lir::GetRegInfo(int reg) { - return ARM_FPREG(reg) ? ®_pool_->FPRegs[reg & ARM_FP_REG_MASK] - : ®_pool_->core_regs[reg]; -} - /* To be used when explicitly managing register use */ void ArmMir2Lir::LockCallTemps() { LockTemp(r0); @@ -718,14 +720,17 @@ int ArmMir2Lir::LoadHelper(ThreadOffset offset) { } uint64_t ArmMir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return ArmMir2Lir::EncodingMap[opcode].flags; } const char* ArmMir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return ArmMir2Lir::EncodingMap[opcode].name; } const char* ArmMir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return ArmMir2Lir::EncodingMap[opcode].fmt; } diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index c63de69284..d631cf7047 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -22,14 +22,14 @@ namespace art { /* This file contains codegen for the Thumb ISA. */ -static int EncodeImmSingle(int value) { - int res; - int bit_a = (value & 0x80000000) >> 31; - int not_bit_b = (value & 0x40000000) >> 30; - int bit_b = (value & 0x20000000) >> 29; - int b_smear = (value & 0x3e000000) >> 25; - int slice = (value & 0x01f80000) >> 19; - int zeroes = (value & 0x0007ffff); +static int32_t EncodeImmSingle(int32_t value) { + int32_t res; + int32_t bit_a = (value & 0x80000000) >> 31; + int32_t not_bit_b = (value & 0x40000000) >> 30; + int32_t bit_b = (value & 0x20000000) >> 29; + int32_t b_smear = (value & 0x3e000000) >> 25; + int32_t slice = (value & 0x01f80000) >> 19; + int32_t zeroes = (value & 0x0007ffff); if (zeroes != 0) return -1; if (bit_b) { @@ -47,15 +47,15 @@ static int EncodeImmSingle(int value) { * Determine whether value can be encoded as a Thumb2 floating point * immediate. If not, return -1. If so return encoded 8-bit value. */ -static int EncodeImmDouble(int64_t value) { - int res; - int bit_a = (value & 0x8000000000000000ll) >> 63; - int not_bit_b = (value & 0x4000000000000000ll) >> 62; - int bit_b = (value & 0x2000000000000000ll) >> 61; - int b_smear = (value & 0x3fc0000000000000ll) >> 54; - int slice = (value & 0x003f000000000000ll) >> 48; +static int32_t EncodeImmDouble(int64_t value) { + int32_t res; + int32_t bit_a = (value & 0x8000000000000000ll) >> 63; + int32_t not_bit_b = (value & 0x4000000000000000ll) >> 62; + int32_t bit_b = (value & 0x2000000000000000ll) >> 61; + int32_t b_smear = (value & 0x3fc0000000000000ll) >> 54; + int32_t slice = (value & 0x003f000000000000ll) >> 48; uint64_t zeroes = (value & 0x0000ffffffffffffll); - if (zeroes != 0) + if (zeroes != 0ull) return -1; if (bit_b) { if ((not_bit_b != 0) || (b_smear != 0xff)) @@ -90,15 +90,14 @@ LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) { LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, r_dest, r15pc, 0, 0, 0, data_target); SetMemRefType(load_pc_rel, true, kLiteral); - load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); AppendLIR(load_pc_rel); return load_pc_rel; } static int LeadingZeros(uint32_t val) { uint32_t alt; - int n; - int count; + int32_t n; + int32_t count; count = 16; n = 32; @@ -118,8 +117,8 @@ static int LeadingZeros(uint32_t val) { * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. */ int ArmMir2Lir::ModifiedImmediate(uint32_t value) { - int z_leading; - int z_trailing; + int32_t z_leading; + int32_t z_trailing; uint32_t b0 = value & 0xff; /* Note: case of value==0 must use 0:000:0:0000000 encoding */ @@ -315,6 +314,22 @@ LIR* ArmMir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, case kOpSub: opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; break; + case kOpRev: + DCHECK_EQ(shift, 0); + if (!thumb_form) { + // Binary, but rm is encoded twice. + return NewLIR3(kThumb2RevRR, r_dest_src1, r_src2, r_src2); + } + opcode = kThumbRev; + break; + case kOpRevsh: + DCHECK_EQ(shift, 0); + if (!thumb_form) { + // Binary, but rm is encoded twice. + return NewLIR3(kThumb2RevshRR, r_dest_src1, r_src2, r_src2); + } + opcode = kThumbRevsh; + break; case kOp2Byte: DCHECK_EQ(shift, 0); return NewLIR4(kThumb2Sbfx, r_dest_src1, r_src2, 0, 8); @@ -328,7 +343,7 @@ LIR* ArmMir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, LOG(FATAL) << "Bad opcode: " << op; break; } - DCHECK_GE(static_cast<int>(opcode), 0); + DCHECK(!IsPseudoLirOp(opcode)); if (EncodingMap[opcode].flags & IS_BINARY_OP) { return NewLIR2(opcode, r_dest_src1, r_src2); } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { @@ -380,6 +395,10 @@ LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, DCHECK_EQ(shift, 0); opcode = kThumb2MulRRR; break; + case kOpDiv: + DCHECK_EQ(shift, 0); + opcode = kThumb2SdivRRR; + break; case kOpOr: opcode = kThumb2OrrRRR; break; @@ -406,7 +425,7 @@ LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, LOG(FATAL) << "Bad opcode: " << op; break; } - DCHECK_GE(static_cast<int>(opcode), 0); + DCHECK(!IsPseudoLirOp(opcode)); if (EncodingMap[opcode].flags & IS_QUAD_OP) { return NewLIR4(opcode, r_dest, r_src1, r_src2, shift); } else { @@ -422,12 +441,12 @@ LIR* ArmMir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2) { LIR* ArmMir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) { LIR* res; bool neg = (value < 0); - int abs_value = (neg) ? -value : value; + int32_t abs_value = (neg) ? -value : value; ArmOpcode opcode = kThumbBkpt; ArmOpcode alt_opcode = kThumbBkpt; bool all_low_regs = (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1)); - int mod_imm = ModifiedImmediate(value); - int mod_imm_neg = ModifiedImmediate(-value); + int32_t mod_imm = ModifiedImmediate(value); + int32_t mod_imm_neg = ModifiedImmediate(-value); switch (op) { case kOpLsl: @@ -545,7 +564,7 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) { /* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */ LIR* ArmMir2Lir::OpRegImm(OpKind op, int r_dest_src1, int value) { bool neg = (value < 0); - int abs_value = (neg) ? -value : value; + int32_t abs_value = (neg) ? -value : value; bool short_form = (((abs_value & 0xff) == abs_value) && ARM_LOWREG(r_dest_src1)); ArmOpcode opcode = kThumbBkpt; switch (op) { @@ -626,7 +645,6 @@ LIR* ArmMir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) { r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target); } SetMemRefType(res, true, kLiteral); - res->alias_info = reinterpret_cast<uintptr_t>(data_target); AppendLIR(res); } return res; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index a49fa7b44d..dfbc887299 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -45,29 +45,54 @@ bool Mir2Lir::IsInexpensiveConstant(RegLocation rl_src) { } void Mir2Lir::MarkSafepointPC(LIR* inst) { - inst->def_mask = ENCODE_ALL; + DCHECK(!inst->flags.use_def_invalid); + inst->u.m.def_mask = ENCODE_ALL; LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC); - DCHECK_EQ(safepoint_pc->def_mask, ENCODE_ALL); + DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL); } -bool Mir2Lir::FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put) { +bool Mir2Lir::FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile) { return cu_->compiler_driver->ComputeInstanceFieldInfo( - field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, is_volatile, is_put); + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), is_put, field_offset, is_volatile); +} + +/* Remove a LIR from the list. */ +void Mir2Lir::UnlinkLIR(LIR* lir) { + if (UNLIKELY(lir == first_lir_insn_)) { + first_lir_insn_ = lir->next; + if (lir->next != NULL) { + lir->next->prev = NULL; + } else { + DCHECK(lir->next == NULL); + DCHECK(lir == last_lir_insn_); + last_lir_insn_ = NULL; + } + } else if (lir == last_lir_insn_) { + last_lir_insn_ = lir->prev; + lir->prev->next = NULL; + } else if ((lir->prev != NULL) && (lir->next != NULL)) { + lir->prev->next = lir->next; + lir->next->prev = lir->prev; + } } /* Convert an instruction to a NOP */ void Mir2Lir::NopLIR(LIR* lir) { lir->flags.is_nop = true; + if (!cu_->verbose) { + UnlinkLIR(lir); + } } void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) { uint64_t *mask_ptr; uint64_t mask = ENCODE_MEM; DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE)); + DCHECK(!lir->flags.use_def_invalid); if (is_load) { - mask_ptr = &lir->use_mask; + mask_ptr = &lir->u.m.use_mask; } else { - mask_ptr = &lir->def_mask; + mask_ptr = &lir->u.m.def_mask; } /* Clear out the memref flags */ *mask_ptr &= ~mask; @@ -104,7 +129,7 @@ void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit * access. */ - lir->alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit); + lir->flags.alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit); } /* @@ -135,10 +160,12 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { break; case kPseudoDalvikByteCodeBoundary: if (lir->operands[0] == 0) { - lir->operands[0] = reinterpret_cast<uintptr_t>("No instruction string"); + // NOTE: only used for debug listings. + lir->operands[0] = WrapPointer(ArenaStrdup("No instruction string")); } LOG(INFO) << "-------- dalvik offset: 0x" << std::hex - << lir->dalvik_offset << " @ " << reinterpret_cast<char*>(lir->operands[0]); + << lir->dalvik_offset << " @ " + << reinterpret_cast<char*>(UnwrapPointer(lir->operands[0])); break; case kPseudoExitBlock: LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest; @@ -190,11 +217,11 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { break; } - if (lir->use_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->use_mask, "use")); + if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) { + DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use")); } - if (lir->def_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->def_mask, "def")); + if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) { + DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def")); } } @@ -225,12 +252,12 @@ void Mir2Lir::DumpPromotionMap() { } /* Dump a mapping table */ -void Mir2Lir::DumpMappingTable(const char* table_name, const std::string& descriptor, - const std::string& name, const std::string& signature, +void Mir2Lir::DumpMappingTable(const char* table_name, const char* descriptor, + const char* name, const Signature& signature, const std::vector<uint32_t>& v) { if (v.size() > 0) { std::string line(StringPrintf("\n %s %s%s_%s_table[%zu] = {", table_name, - descriptor.c_str(), name.c_str(), signature.c_str(), v.size())); + descriptor, name, signature.ToString().c_str(), v.size())); std::replace(line.begin(), line.end(), ';', '_'); LOG(INFO) << line; for (uint32_t i = 0; i < v.size(); i+=2) { @@ -270,9 +297,9 @@ void Mir2Lir::CodegenDump() { const DexFile::MethodId& method_id = cu_->dex_file->GetMethodId(cu_->method_idx); - std::string signature(cu_->dex_file->GetMethodSignature(method_id)); - std::string name(cu_->dex_file->GetMethodName(method_id)); - std::string descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id)); + const Signature signature = cu_->dex_file->GetMethodSignature(method_id); + const char* name = cu_->dex_file->GetMethodName(method_id); + const char* descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id)); // Dump mapping tables DumpMappingTable("PC2Dex_MappingTable", descriptor, name, signature, pc2dex_mapping_table_); @@ -325,6 +352,7 @@ LIR* Mir2Lir::AddWordData(LIR* *constant_list_p, int value) { new_value->operands[0] = value; new_value->next = *constant_list_p; *constant_list_p = new_value; + estimated_native_code_size_ += sizeof(value); return new_value; } return NULL; @@ -343,6 +371,17 @@ static void PushWord(std::vector<uint8_t>&buf, int data) { buf.push_back((data >> 24) & 0xff); } +// Push 8 bytes on 64-bit systems; 4 on 32-bit systems. +static void PushPointer(std::vector<uint8_t>&buf, void const* pointer) { + uintptr_t data = reinterpret_cast<uintptr_t>(pointer); + if (sizeof(void*) == sizeof(uint64_t)) { + PushWord(buf, (data >> (sizeof(void*) * 4)) & 0xFFFFFFFF); + PushWord(buf, data & 0xFFFFFFFF); + } else { + PushWord(buf, data); + } +} + static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) { while (buf.size() < offset) { buf.push_back(0); @@ -369,9 +408,8 @@ void Mir2Lir::InstallLiteralPools() { static_cast<InvokeType>(data_lir->operands[1]), code_buffer_.size()); const DexFile::MethodId& id = cu_->dex_file->GetMethodId(target); - // unique based on target to ensure code deduplication works - uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id); - PushWord(code_buffer_, unique_patch_value); + // unique value based on target to ensure code deduplication works + PushPointer(code_buffer_, &id); data_lir = NEXT_LIR(data_lir); } data_lir = method_literal_list_; @@ -385,9 +423,8 @@ void Mir2Lir::InstallLiteralPools() { static_cast<InvokeType>(data_lir->operands[1]), code_buffer_.size()); const DexFile::MethodId& id = cu_->dex_file->GetMethodId(target); - // unique based on target to ensure code deduplication works - uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id); - PushWord(code_buffer_, unique_patch_value); + // unique value based on target to ensure code deduplication works + PushPointer(code_buffer_, &id); data_lir = NEXT_LIR(data_lir); } } @@ -408,6 +445,7 @@ void Mir2Lir::InstallSwitchTables() { int bx_offset = INVALID_OFFSET; switch (cu_->instruction_set) { case kThumb2: + DCHECK(tab_rec->anchor->flags.fixup != kFixupNone); bx_offset = tab_rec->anchor->offset + 4; break; case kX86: @@ -422,7 +460,7 @@ void Mir2Lir::InstallSwitchTables() { LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset; } if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { - const int* keys = reinterpret_cast<const int*>(&(tab_rec->table[2])); + const int32_t* keys = reinterpret_cast<const int32_t*>(&(tab_rec->table[2])); for (int elems = 0; elems < tab_rec->table[1]; elems++) { int disp = tab_rec->targets[elems]->offset - bx_offset; if (cu_->verbose) { @@ -463,7 +501,7 @@ void Mir2Lir::InstallFillArrayData() { } } -static int AssignLiteralOffsetCommon(LIR* lir, int offset) { +static int AssignLiteralOffsetCommon(LIR* lir, CodeOffset offset) { for (; lir != NULL; lir = lir->next) { lir->offset = offset; offset += 4; @@ -471,6 +509,17 @@ static int AssignLiteralOffsetCommon(LIR* lir, int offset) { return offset; } +static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset) { + unsigned int element_size = sizeof(void*); + // Align to natural pointer size. + offset = (offset + (element_size - 1)) & ~(element_size - 1); + for (; lir != NULL; lir = lir->next) { + lir->offset = offset; + offset += element_size; + } + return offset; +} + // Make sure we have a code address for every declared catch entry bool Mir2Lir::VerifyCatchEntries() { bool success = true; @@ -580,8 +629,8 @@ class NativePcToReferenceMapBuilder { table_index = (table_index + 1) % entries_; } in_use_[table_index] = true; - SetNativeOffset(table_index, native_offset); - DCHECK_EQ(native_offset, GetNativeOffset(table_index)); + SetCodeOffset(table_index, native_offset); + DCHECK_EQ(native_offset, GetCodeOffset(table_index)); SetReferences(table_index, references); } @@ -590,7 +639,7 @@ class NativePcToReferenceMapBuilder { return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_; } - uint32_t GetNativeOffset(size_t table_index) { + uint32_t GetCodeOffset(size_t table_index) { uint32_t native_offset = 0; size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); for (size_t i = 0; i < native_offset_width_; i++) { @@ -599,7 +648,7 @@ class NativePcToReferenceMapBuilder { return native_offset; } - void SetNativeOffset(size_t table_index, uint32_t native_offset) { + void SetCodeOffset(size_t table_index, uint32_t native_offset) { size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); for (size_t i = 0; i < native_offset_width_; i++) { (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF; @@ -654,17 +703,17 @@ void Mir2Lir::CreateNativeGcMap() { } /* Determine the offset of each literal field */ -int Mir2Lir::AssignLiteralOffset(int offset) { +int Mir2Lir::AssignLiteralOffset(CodeOffset offset) { offset = AssignLiteralOffsetCommon(literal_list_, offset); - offset = AssignLiteralOffsetCommon(code_literal_list_, offset); - offset = AssignLiteralOffsetCommon(method_literal_list_, offset); + offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset); + offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset); return offset; } -int Mir2Lir::AssignSwitchTablesOffset(int offset) { +int Mir2Lir::AssignSwitchTablesOffset(CodeOffset offset) { GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_); while (true) { - Mir2Lir::SwitchTable *tab_rec = iterator.Next(); + Mir2Lir::SwitchTable* tab_rec = iterator.Next(); if (tab_rec == NULL) break; tab_rec->offset = offset; if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { @@ -678,7 +727,7 @@ int Mir2Lir::AssignSwitchTablesOffset(int offset) { return offset; } -int Mir2Lir::AssignFillArrayDataOffset(int offset) { +int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) { GrowableArray<FillArrayData*>::Iterator iterator(&fill_array_data_); while (true) { Mir2Lir::FillArrayData *tab_rec = iterator.Next(); @@ -691,122 +740,35 @@ int Mir2Lir::AssignFillArrayDataOffset(int offset) { return offset; } -// LIR offset assignment. -int Mir2Lir::AssignInsnOffsets() { - LIR* lir; - int offset = 0; - - for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - lir->offset = offset; - if (lir->opcode >= 0) { - if (!lir->flags.is_nop) { - offset += lir->flags.size; - } - } else if (lir->opcode == kPseudoPseudoAlign4) { - if (offset & 0x2) { - offset += 2; - lir->operands[0] = 1; - } else { - lir->operands[0] = 0; - } - } - /* Pseudo opcodes don't consume space */ - } - - return offset; -} - -/* - * Walk the compilation unit and assign offsets to instructions - * and literals and compute the total size of the compiled unit. - */ -void Mir2Lir::AssignOffsets() { - int offset = AssignInsnOffsets(); - - /* Const values have to be word aligned */ - offset = (offset + 3) & ~3; - - /* Set up offsets for literals */ - data_offset_ = offset; - - offset = AssignLiteralOffset(offset); - - offset = AssignSwitchTablesOffset(offset); - - offset = AssignFillArrayDataOffset(offset); - - total_size_ = offset; -} - -/* - * Go over each instruction in the list and calculate the offset from the top - * before sending them off to the assembler. If out-of-range branch distance is - * seen rearrange the instructions a bit to correct it. - */ -void Mir2Lir::AssembleLIR() { - AssignOffsets(); - int assembler_retries = 0; - /* - * Assemble here. Note that we generate code with optimistic assumptions - * and if found now to work, we'll have to redo the sequence and retry. - */ - - while (true) { - AssemblerStatus res = AssembleInstructions(0); - if (res == kSuccess) { - break; - } else { - assembler_retries++; - if (assembler_retries > MAX_ASSEMBLER_RETRIES) { - CodegenDump(); - LOG(FATAL) << "Assembler error - too many retries"; - } - // Redo offsets and try again - AssignOffsets(); - code_buffer_.clear(); - } - } - - // Install literals - InstallLiteralPools(); - - // Install switch tables - InstallSwitchTables(); - - // Install fill array data - InstallFillArrayData(); - - // Create the mapping table and native offset to reference map. - CreateMappingTables(); - - CreateNativeGcMap(); -} - /* * Insert a kPseudoCaseLabel at the beginning of the Dalvik - * offset vaddr. This label will be used to fix up the case - * branch table during the assembly phase. Be sure to set - * all resource flags on this to prevent code motion across - * target boundaries. KeyVal is just there for debugging. + * offset vaddr if pretty-printing, otherise use the standard block + * label. The selected label will be used to fix up the case + * branch table during the assembly phase. All resource flags + * are set to prevent code motion. KeyVal is just there for debugging. */ -LIR* Mir2Lir::InsertCaseLabel(int vaddr, int keyVal) { - SafeMap<unsigned int, LIR*>::iterator it; - it = boundary_map_.find(vaddr); - if (it == boundary_map_.end()) { - LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr; +LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) { + LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id]; + LIR* res = boundary_lir; + if (cu_->verbose) { + // Only pay the expense if we're pretty-printing. + LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); + new_label->dalvik_offset = vaddr; + new_label->opcode = kPseudoCaseLabel; + new_label->operands[0] = keyVal; + new_label->flags.fixup = kFixupLabel; + DCHECK(!new_label->flags.use_def_invalid); + new_label->u.m.def_mask = ENCODE_ALL; + InsertLIRAfter(boundary_lir, new_label); + res = new_label; } - LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); - new_label->dalvik_offset = vaddr; - new_label->opcode = kPseudoCaseLabel; - new_label->operands[0] = keyVal; - InsertLIRAfter(it->second, new_label); - return new_label; + return res; } -void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) { +void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) { const uint16_t* table = tab_rec->table; - int base_vaddr = tab_rec->vaddr; - const int *targets = reinterpret_cast<const int*>(&table[4]); + DexOffset base_vaddr = tab_rec->vaddr; + const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]); int entries = table[1]; int low_key = s4FromSwitchData(&table[2]); for (int i = 0; i < entries; i++) { @@ -814,12 +776,12 @@ void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) { } } -void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec) { +void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec) { const uint16_t* table = tab_rec->table; - int base_vaddr = tab_rec->vaddr; + DexOffset base_vaddr = tab_rec->vaddr; int entries = table[1]; - const int* keys = reinterpret_cast<const int*>(&table[2]); - const int* targets = &keys[entries]; + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; for (int i = 0; i < entries; i++) { tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], keys[i]); } @@ -852,8 +814,8 @@ void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) { */ uint16_t ident = table[0]; int entries = table[1]; - const int* keys = reinterpret_cast<const int*>(&table[2]); - const int* targets = &keys[entries]; + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident << ", entries: " << std::dec << entries; for (int i = 0; i < entries; i++) { @@ -872,7 +834,7 @@ void Mir2Lir::DumpPackedSwitchTable(const uint16_t* table) { * Total size is (4+size*2) 16-bit code units. */ uint16_t ident = table[0]; - const int* targets = reinterpret_cast<const int*>(&table[4]); + const int32_t* targets = reinterpret_cast<const int32_t*>(&table[4]); int entries = table[1]; int low_key = s4FromSwitchData(&table[2]); LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident @@ -883,18 +845,10 @@ void Mir2Lir::DumpPackedSwitchTable(const uint16_t* table) { } } -/* - * Set up special LIR to mark a Dalvik byte-code instruction start and - * record it in the boundary_map. NOTE: in cases such as kMirOpCheck in - * which we split a single Dalvik instruction, only the first MIR op - * associated with a Dalvik PC should be entered into the map. - */ -LIR* Mir2Lir::MarkBoundary(int offset, const char* inst_str) { - LIR* res = NewLIR1(kPseudoDalvikByteCodeBoundary, reinterpret_cast<uintptr_t>(inst_str)); - if (boundary_map_.find(offset) == boundary_map_.end()) { - boundary_map_.Put(offset, res); - } - return res; +/* Set up special LIR to mark a Dalvik byte-code instruction start for pretty printing */ +void Mir2Lir::MarkBoundary(DexOffset offset, const char* inst_str) { + // NOTE: only used for debug listings. + NewLIR1(kPseudoDalvikByteCodeBoundary, WrapPointer(ArenaStrdup(inst_str))); } bool Mir2Lir::EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) { @@ -942,6 +896,7 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena literal_list_(NULL), method_literal_list_(NULL), code_literal_list_(NULL), + first_fixup_(NULL), cu_(cu), mir_graph_(mir_graph), switch_tables_(arena, 4, kGrowableArraySwitchTables), @@ -949,10 +904,14 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena throw_launchpads_(arena, 2048, kGrowableArrayThrowLaunchPads), suspend_launchpads_(arena, 4, kGrowableArraySuspendLaunchPads), intrinsic_launchpads_(arena, 2048, kGrowableArrayMisc), + tempreg_info_(arena, 20, kGrowableArrayMisc), + reginfo_map_(arena, 64, kGrowableArrayMisc), + pointer_storage_(arena, 128, kGrowableArrayMisc), data_offset_(0), total_size_(0), block_label_list_(NULL), current_dalvik_offset_(0), + estimated_native_code_size_(0), reg_pool_(NULL), live_sreg_(0), num_core_spills_(0), @@ -965,9 +924,13 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena promotion_map_ = static_cast<PromotionMap*> (arena_->Alloc((cu_->num_dalvik_registers + cu_->num_compiler_temps + 1) * sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc)); + // Reserve pointer id 0 for NULL. + size_t null_idx = WrapPointer(NULL); + DCHECK_EQ(null_idx, 0U); } void Mir2Lir::Materialize() { + cu_->NewTimingSplit("RegisterAllocation"); CompilerInitializeRegAlloc(); // Needs to happen after SSA naming /* Allocate Registers using simple local allocation scheme */ @@ -979,6 +942,7 @@ void Mir2Lir::Materialize() { * special codegen doesn't succeed, first_lir_insn_ will * set to NULL; */ + cu_->NewTimingSplit("SpecialMIR2LIR"); SpecialMIR2LIR(mir_graph_->GetSpecialCase()); } @@ -1091,5 +1055,4 @@ void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) { new_lir->next->prev = new_lir; } - } // namespace art diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index f018c61819..df6493dc77 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -30,16 +30,17 @@ namespace art { */ /* - * Generate an kPseudoBarrier marker to indicate the boundary of special + * Generate a kPseudoBarrier marker to indicate the boundary of special * blocks. */ void Mir2Lir::GenBarrier() { LIR* barrier = NewLIR0(kPseudoBarrier); /* Mark all resources as being clobbered */ - barrier->def_mask = -1; + DCHECK(!barrier->flags.use_def_invalid); + barrier->u.m.def_mask = ENCODE_ALL; } -// FIXME: need to do some work to split out targets with +// TODO: need to do some work to split out targets with // condition codes and those without LIR* Mir2Lir::GenCheck(ConditionCode c_code, ThrowKind kind) { DCHECK_NE(cu_->instruction_set, kMips); @@ -65,8 +66,7 @@ LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, int reg, int imm_val, ThrowKin /* Perform null-check on a register. */ LIR* Mir2Lir::GenNullCheck(int s_reg, int m_reg, int opt_flags) { - if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && - opt_flags & MIR_IGNORE_NULL_CHECK) { + if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { return NULL; } return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); @@ -127,13 +127,11 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src2))) { // OK - convert this to a compare immediate and branch OpCmpImmBranch(cond, rl_src1.low_reg, mir_graph_->ConstantValue(rl_src2), taken); - OpUnconditionalBranch(fall_through); return; } } rl_src2 = LoadValue(rl_src2, kCoreReg); OpCmpBranch(cond, rl_src1.low_reg, rl_src2.low_reg, taken); - OpUnconditionalBranch(fall_through); } void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken, @@ -164,7 +162,6 @@ void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_s LOG(FATAL) << "Unexpected opcode " << opcode; } OpCmpImmBranch(cond, rl_src.low_reg, 0, taken); - OpUnconditionalBranch(fall_through); } void Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { @@ -337,8 +334,8 @@ void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_do bool is_volatile; bool is_referrers_class; bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo( - field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index, - is_referrers_class, is_volatile, true); + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), true, + &field_offset, &ssb_index, &is_referrers_class, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { DCHECK_GE(field_offset, 0); int rBase; @@ -423,8 +420,8 @@ void Mir2Lir::GenSget(uint32_t field_idx, RegLocation rl_dest, bool is_volatile; bool is_referrers_class; bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo( - field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index, - is_referrers_class, is_volatile, false); + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), false, + &field_offset, &ssb_index, &is_referrers_class, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { DCHECK_GE(field_offset, 0); int rBase; @@ -506,7 +503,7 @@ void Mir2Lir::HandleSuspendLaunchPads() { ResetRegPool(); ResetDefTracking(); LIR* lab = suspend_launchpads_.Get(i); - LIR* resume_lab = reinterpret_cast<LIR*>(lab->operands[0]); + LIR* resume_lab = reinterpret_cast<LIR*>(UnwrapPointer(lab->operands[0])); current_dalvik_offset_ = lab->operands[1]; AppendLIR(lab); int r_tgt = CallHelperSetup(helper_offset); @@ -521,12 +518,12 @@ void Mir2Lir::HandleIntrinsicLaunchPads() { ResetRegPool(); ResetDefTracking(); LIR* lab = intrinsic_launchpads_.Get(i); - CallInfo* info = reinterpret_cast<CallInfo*>(lab->operands[0]); + CallInfo* info = reinterpret_cast<CallInfo*>(UnwrapPointer(lab->operands[0])); current_dalvik_offset_ = info->offset; AppendLIR(lab); // NOTE: GenInvoke handles MarkSafepointPC GenInvoke(info); - LIR* resume_lab = reinterpret_cast<LIR*>(lab->operands[2]); + LIR* resume_lab = reinterpret_cast<LIR*>(UnwrapPointer(lab->operands[2])); if (resume_lab != NULL) { OpUnconditionalBranch(resume_lab); } @@ -626,7 +623,7 @@ void Mir2Lir::GenIGet(uint32_t field_idx, int opt_flags, OpSize size, int field_offset; bool is_volatile; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { RegLocation rl_result; @@ -687,8 +684,7 @@ void Mir2Lir::GenIPut(uint32_t field_idx, int opt_flags, OpSize size, int field_offset; bool is_volatile; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, - true); + bool fast_path = FastInstance(field_idx, true, &field_offset, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { RegisterClass reg_class = oat_reg_class_by_size(size); DCHECK_GE(field_offset, 0); @@ -730,6 +726,18 @@ void Mir2Lir::GenIPut(uint32_t field_idx, int opt_flags, OpSize size, } } +void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src) { + bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK); + bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) && + (opt_flags & MIR_IGNORE_NULL_CHECK)); + ThreadOffset helper = needs_range_check + ? (needs_null_check ? QUICK_ENTRYPOINT_OFFSET(pAputObjectWithNullAndBoundCheck) + : QUICK_ENTRYPOINT_OFFSET(pAputObjectWithBoundCheck)) + : QUICK_ENTRYPOINT_OFFSET(pAputObject); + CallRuntimeHelperRegLocationRegLocationRegLocation(helper, rl_array, rl_index, rl_src, true); +} + void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { RegLocation rl_method = LoadCurrMethod(); int res_reg = AllocTemp(); @@ -1113,8 +1121,8 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ if (!type_known_abstract) { branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL); } - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg1), - TargetReg(kArg2), true); + CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2), + TargetReg(kArg1), true); /* branch target here */ LIR* target = NewLIR0(kPseudoTargetLabel); branch1->target = target; @@ -1299,6 +1307,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, } StoreValue(rl_dest, rl_result); } else { + bool done = false; // Set to true if we happen to find a way to use a real instruction. if (cu_->instruction_set == kMips) { rl_src1 = LoadValue(rl_src1, kCoreReg); rl_src2 = LoadValue(rl_src2, kCoreReg); @@ -1306,7 +1315,23 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero); } rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv); - } else { + done = true; + } else if (cu_->instruction_set == kThumb2) { + if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) { + // Use ARM SDIV instruction for division. For remainder we also need to + // calculate using a MUL and subtract. + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + if (check_zero) { + GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero); + } + rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv); + done = true; + } + } + + // If we haven't already generated the code use the callout function. + if (!done) { ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod); FlushAllRegs(); /* Send everything to home location */ LoadValueDirectFixed(rl_src2, TargetReg(kArg1)); @@ -1315,7 +1340,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, if (check_zero) { GenImmedCheck(kCondEq, TargetReg(kArg1), 0, kThrowDivZero); } - // NOTE: callout here is not a safepoint + // NOTE: callout here is not a safepoint. CallHelper(r_tgt, func_offset, false /* not a safepoint */); if (op == kOpDiv) rl_result = GetReturn(false); @@ -1343,7 +1368,7 @@ static bool IsPopCountLE2(unsigned int x) { } // Returns the index of the lowest set bit in 'x'. -static int LowestSetBit(unsigned int x) { +static int32_t LowestSetBit(uint32_t x) { int bit_posn = 0; while ((x & 0xf) == 0) { bit_posn += 4; @@ -1553,11 +1578,24 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re if (HandleEasyDivRem(opcode, is_div, rl_src, rl_dest, lit)) { return; } + + bool done = false; if (cu_->instruction_set == kMips) { rl_src = LoadValue(rl_src, kCoreReg); rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div); - } else { - FlushAllRegs(); /* Everything to home location */ + done = true; + } else if (cu_->instruction_set == kThumb2) { + if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) { + // Use ARM SDIV instruction for division. For remainder we also need to + // calculate using a MUL and subtract. + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div); + done = true; + } + } + + if (!done) { + FlushAllRegs(); /* Everything to home location. */ LoadValueDirectFixed(rl_src, TargetReg(kArg0)); Clobber(TargetReg(kArg0)); ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod); @@ -1575,7 +1613,7 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re } rl_src = LoadValue(rl_src, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); - // Avoid shifts by literal 0 - no support in Thumb. Change to copy + // Avoid shifts by literal 0 - no support in Thumb. Change to copy. if (shift_op && (lit == 0)) { OpRegCopy(rl_result.low_reg, rl_src.low_reg); } else { @@ -1651,7 +1689,7 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, case Instruction::REM_LONG_2ADDR: call_out = true; check_zero = true; - func_offset = QUICK_ENTRYPOINT_OFFSET(pLdivmod); + func_offset = QUICK_ENTRYPOINT_OFFSET(pLmod); /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */ ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2) : TargetReg(kRet0); break; @@ -1744,8 +1782,8 @@ void Mir2Lir::GenSuspendTest(int opt_flags) { FlushAllRegs(); LIR* branch = OpTestSuspend(NULL); LIR* ret_lab = NewLIR0(kPseudoTargetLabel); - LIR* target = RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, - reinterpret_cast<uintptr_t>(ret_lab), current_dalvik_offset_); + LIR* target = RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, WrapPointer(ret_lab), + current_dalvik_offset_); branch->target = target; suspend_launchpads_.Insert(target); } @@ -1758,11 +1796,23 @@ void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) { } OpTestSuspend(target); LIR* launch_pad = - RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, - reinterpret_cast<uintptr_t>(target), current_dalvik_offset_); + RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, WrapPointer(target), + current_dalvik_offset_); FlushAllRegs(); OpUnconditionalBranch(launch_pad); suspend_launchpads_.Insert(launch_pad); } +/* Call out to helper assembly routine that will null check obj and then lock it. */ +void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { + FlushAllRegs(); + CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pLockObject), rl_src, true); +} + +/* Call out to helper assembly routine that will null check obj and then unlock it. */ +void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { + FlushAllRegs(); + CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true); +} + } // namespace art diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 2a0a23c7cd..7225262647 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -214,6 +214,7 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_off int arg0, RegLocation arg1, RegLocation arg2, bool safepoint_pc) { int r_tgt = CallHelperSetup(helper_offset); + DCHECK_EQ(arg1.wide, 0U); LoadValueDirectFixed(arg1, TargetReg(kArg1)); if (arg2.wide == 0) { LoadValueDirectFixed(arg2, TargetReg(kArg2)); @@ -225,6 +226,21 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_off CallHelper(r_tgt, helper_offset, safepoint_pc); } +void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset, + RegLocation arg0, RegLocation arg1, + RegLocation arg2, + bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + DCHECK_EQ(arg0.wide, 0U); + LoadValueDirectFixed(arg0, TargetReg(kArg0)); + DCHECK_EQ(arg1.wide, 0U); + LoadValueDirectFixed(arg1, TargetReg(kArg1)); + DCHECK_EQ(arg1.wide, 0U); + LoadValueDirectFixed(arg2, TargetReg(kArg2)); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + /* * If there are any ins passed in registers that have not been promoted * to a callee-save register, flush them to the frame. Perform intial @@ -334,16 +350,13 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, uintptr_t direct_code, uintptr_t direct_method, InvokeType type) { Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); - if (cu->instruction_set != kThumb2) { - // Disable sharpening - direct_code = 0; - direct_method = 0; - } if (direct_code != 0 && direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] if (direct_code != static_cast<unsigned int>(-1)) { - cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); + if (cu->instruction_set != kX86) { + cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); + } } else { CHECK_EQ(cu->dex_file, target_method.dex_file); LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_, @@ -389,6 +402,7 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); } else { CHECK_EQ(cu->dex_file, target_method.dex_file); + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_, target_method.dex_method_index, 0); if (data_target == NULL) { @@ -477,73 +491,56 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info, } /* - * All invoke-interface calls bounce off of art_quick_invoke_interface_trampoline, - * which will locate the target and continue on via a tail call. + * Emit the next instruction in an invoke interface sequence. This will do a lookup in the + * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if + * more than one interface method map to the same index. Note also that we'll load the first + * argument ("this") into kArg1 here rather than the standard LoadArgRegs. */ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state, const MethodReference& target_method, - uint32_t unused, uintptr_t unused2, - uintptr_t direct_method, InvokeType unused4) { + uint32_t method_idx, uintptr_t unused, + uintptr_t direct_method, InvokeType unused2) { Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); - if (cu->instruction_set != kThumb2) { - // Disable sharpening - direct_method = 0; - } - ThreadOffset trampoline = QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline); - if (direct_method != 0) { - switch (state) { - case 0: // Load the trampoline target [sets kInvokeTgt]. - if (cu->instruction_set != kX86) { - cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline.Int32Value(), - cg->TargetReg(kInvokeTgt)); - } - // Get the interface Method* [sets kArg0] - if (direct_method != static_cast<unsigned int>(-1)) { - cg->LoadConstant(cg->TargetReg(kArg0), direct_method); - } else { - CHECK_EQ(cu->dex_file, target_method.dex_file); - LIR* data_target = cg->ScanLiteralPool(cg->method_literal_list_, - target_method.dex_method_index, 0); - if (data_target == NULL) { - data_target = cg->AddWordData(&cg->method_literal_list_, - target_method.dex_method_index); - data_target->operands[1] = kInterface; - } - LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kArg0), data_target); - cg->AppendLIR(load_pc_rel); - DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target); - } - break; - default: - return -1; + switch (state) { + case 0: // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)] + CHECK_EQ(cu->dex_file, target_method.dex_file); + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); + cg->LoadConstant(cg->TargetReg(kHiddenArg), target_method.dex_method_index); + if (cu->instruction_set == kX86) { + cg->OpRegCopy(cg->TargetReg(kHiddenFpArg), cg->TargetReg(kHiddenArg)); + } + break; + case 1: { // Get "this" [set kArg1] + RegLocation rl_arg = info->args[0]; + cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1)); + break; } - } else { - switch (state) { - case 0: - // Get the current Method* [sets kArg0] - TUNING: remove copy of method if it is promoted. - cg->LoadCurrMethodDirect(cg->TargetReg(kArg0)); - // Load the trampoline target [sets kInvokeTgt]. - if (cu->instruction_set != kX86) { - cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline.Int32Value(), - cg->TargetReg(kInvokeTgt)); - } - break; - case 1: // Get method->dex_cache_resolved_methods_ [set/use kArg0] - cg->LoadWordDisp(cg->TargetReg(kArg0), - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - cg->TargetReg(kArg0)); + case 2: // Is "this" null? [use kArg1] + cg->GenNullCheck(info->args[0].s_reg_low, cg->TargetReg(kArg1), info->opt_flags); + // Get this->klass_ [use kArg1, set kInvokeTgt] + cg->LoadWordDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); break; - case 2: // Grab target method* [set/use kArg0] - CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadWordDisp(cg->TargetReg(kArg0), - mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + - (target_method.dex_method_index * 4), + case 3: // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt] + cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + case 4: // Get target method [use kInvokeTgt, set kArg0] + cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), ((method_idx % ClassLinker::kImtSize) * 4) + + mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(), cg->TargetReg(kArg0)); break; + case 5: // Get the compiled code address [use kArg0, set kInvokeTgt] + if (cu->instruction_set != kX86) { + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + } + // Intentional fallthrough for X86 default: return -1; - } } return state + 1; } @@ -810,7 +807,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset); LIR* ld = OpVldm(TargetReg(kArg3), regs_left); // TUNING: loosen barrier - ld->def_mask = ENCODE_ALL; + ld->u.m.def_mask = ENCODE_ALL; SetMemRefType(ld, true /* is_load */, kDalvikReg); call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); @@ -819,7 +816,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, direct_code, direct_method, type); LIR* st = OpVstm(TargetReg(kArg3), regs_left); SetMemRefType(st, false /* is_load */, kDalvikReg); - st->def_mask = ENCODE_ALL; + st->u.m.def_mask = ENCODE_ALL; call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } @@ -892,7 +889,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { LoadWordDisp(rl_obj.low_reg, value_offset, reg_ptr); if (range_check) { // Set up a launch pad to allow retry in case of bounds violation */ - launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpRegReg(kOpCmp, rl_idx.low_reg, reg_max); FreeTemp(reg_max); @@ -903,7 +900,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { reg_max = AllocTemp(); LoadWordDisp(rl_obj.low_reg, count_offset, reg_max); // Set up a launch pad to allow retry in case of bounds violation */ - launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpRegReg(kOpCmp, rl_idx.low_reg, reg_max); FreeTemp(reg_max); @@ -961,6 +958,31 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { return true; } +bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) { + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + RegLocation rl_src_i = info->args[0]; + RegLocation rl_dest = InlineTarget(info); // result reg + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == kLong) { + RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg); + int reg_tmp = AllocTemp(); + OpRegCopy(reg_tmp, rl_result.low_reg); + OpRegReg(kOpRev, rl_result.low_reg, rl_i.high_reg); + OpRegReg(kOpRev, rl_result.high_reg, reg_tmp); + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kWord || size == kSignedHalf); + OpKind op = (size == kWord) ? kOpRev : kOpRevsh; + RegLocation rl_i = LoadValue(rl_src_i, kCoreReg); + OpRegReg(op, rl_result.low_reg, rl_i.low_reg); + StoreValue(rl_dest, rl_result); + } + return true; +} + bool Mir2Lir::GenInlinedAbsInt(CallInfo* info) { if (cu_->instruction_set == kMips) { // TODO - add Mips implementation @@ -1069,7 +1091,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } int r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)) : 0; GenNullCheck(rl_obj.s_reg_low, reg_ptr, info->opt_flags); - LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, launch_pad); // NOTE: not a safepoint @@ -1079,7 +1101,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pIndexOf)); } LIR* resume_tgt = NewLIR0(kPseudoTargetLabel); - launch_pad->operands[2] = reinterpret_cast<uintptr_t>(resume_tgt); + launch_pad->operands[2] = WrapPointer(resume_tgt); // Record that we've already inlined & null checked info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK); RegLocation rl_return = GetReturn(false); @@ -1107,7 +1129,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) { LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : 0; GenNullCheck(rl_this.s_reg_low, reg_this, info->opt_flags); // TUNING: check if rl_cmp.s_reg_low is already null checked - LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpCmpImmBranch(kCondEq, reg_cmp, 0, launch_pad); // NOTE: not a safepoint @@ -1219,8 +1241,10 @@ bool Mir2Lir::GenIntrinsic(CallInfo* info) { * method. By doing this during basic block construction, we can also * take advantage of/generate new useful dataflow info. */ + const DexFile::MethodId& target_mid = cu_->dex_file->GetMethodId(info->index); + const DexFile::TypeId& declaring_type = cu_->dex_file->GetTypeId(target_mid.class_idx_); StringPiece tgt_methods_declaring_class( - cu_->dex_file->GetMethodDeclaringClassDescriptor(cu_->dex_file->GetMethodId(info->index))); + cu_->dex_file->StringDataByIdx(declaring_type.descriptor_idx_)); if (tgt_methods_declaring_class.starts_with("Ljava/lang/Double;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") { @@ -1231,12 +1255,22 @@ bool Mir2Lir::GenIntrinsic(CallInfo* info) { } } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Float;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); - if (tgt_method == "int java.lang.Float.float_to_raw_int_bits(float)") { + if (tgt_method == "int java.lang.Float.floatToRawIntBits(float)") { return GenInlinedFloatCvt(info); } if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") { return GenInlinedFloatCvt(info); } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Integer;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "int java.lang.Integer.reverseBytes(int)") { + return GenInlinedReverseBytes(info, kWord); + } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Long;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "long java.lang.Long.reverseBytes(long)") { + return GenInlinedReverseBytes(info, kLong); + } } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Math;") || tgt_methods_declaring_class.starts_with("Ljava/lang/StrictMath;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); @@ -1260,6 +1294,11 @@ bool Mir2Lir::GenIntrinsic(CallInfo* info) { tgt_method == "double java.lang.StrictMath.sqrt(double)") { return GenInlinedSqrt(info); } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Short;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "short java.lang.Short.reverseBytes(short)") { + return GenInlinedReverseBytes(info, kSignedHalf); + } } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/String;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); if (tgt_method == "char java.lang.String.charAt(int)") { @@ -1285,6 +1324,32 @@ bool Mir2Lir::GenIntrinsic(CallInfo* info) { if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") { return GenInlinedCurrentThread(info); } + } else if (tgt_methods_declaring_class.starts_with("Llibcore/io/Memory;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "byte libcore.io.Memory.peekByte(long)") { + return GenInlinedPeek(info, kSignedByte); + } + if (tgt_method == "int libcore.io.Memory.peekIntNative(long)") { + return GenInlinedPeek(info, kWord); + } + if (tgt_method == "long libcore.io.Memory.peekLongNative(long)") { + return GenInlinedPeek(info, kLong); + } + if (tgt_method == "short libcore.io.Memory.peekShortNative(long)") { + return GenInlinedPeek(info, kSignedHalf); + } + if (tgt_method == "void libcore.io.Memory.pokeByte(long, byte)") { + return GenInlinedPoke(info, kSignedByte); + } + if (tgt_method == "void libcore.io.Memory.pokeIntNative(long, int)") { + return GenInlinedPoke(info, kWord); + } + if (tgt_method == "void libcore.io.Memory.pokeLongNative(long, long)") { + return GenInlinedPoke(info, kLong); + } + if (tgt_method == "void libcore.io.Memory.pokeShortNative(long, short)") { + return GenInlinedPoke(info, kSignedHalf); + } } else if (tgt_methods_declaring_class.starts_with("Lsun/misc/Unsafe;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") { @@ -1373,16 +1438,13 @@ void Mir2Lir::GenInvoke(CallInfo* info) { bool fast_path = cu_->compiler_driver->ComputeInvokeInfo(mir_graph_->GetCurrentDexCompilationUnit(), current_dalvik_offset_, - info->type, target_method, - vtable_idx, - direct_code, direct_method, - true) && !SLOW_INVOKE_PATH; + true, true, + &info->type, &target_method, + &vtable_idx, + &direct_code, &direct_method) && !SLOW_INVOKE_PATH; if (info->type == kInterface) { - if (fast_path) { - p_null_ck = &null_ck; - } next_call_insn = fast_path ? NextInterfaceCallInsn : NextInterfaceCallInsnWithAccessCheck; - skip_this = false; + skip_this = fast_path; } else if (info->type == kDirect) { if (fast_path) { p_null_ck = &null_ck; @@ -1422,15 +1484,14 @@ void Mir2Lir::GenInvoke(CallInfo* info) { if (cu_->instruction_set != kX86) { call_inst = OpReg(kOpBlx, TargetReg(kInvokeTgt)); } else { - if (fast_path && info->type != kInterface) { + if (fast_path) { call_inst = OpMem(kOpBlx, TargetReg(kArg0), mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value()); } else { ThreadOffset trampoline(-1); switch (info->type) { case kInterface: - trampoline = fast_path ? QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline) - : QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); + trampoline = QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); break; case kDirect: trampoline = QUICK_ENTRYPOINT_OFFSET(pInvokeDirectTrampolineWithAccessCheck); diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index 630e990733..0f29578c4e 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -21,8 +21,8 @@ namespace art { #define DEBUG_OPT(X) /* Check RAW, WAR, and RAW dependency on the register operands */ -#define CHECK_REG_DEP(use, def, check) ((def & check->use_mask) || \ - ((use | def) & check->def_mask)) +#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \ + ((use | def) & check->u.m.def_mask)) /* Scheduler heuristics */ #define MAX_HOIST_DISTANCE 20 @@ -30,10 +30,10 @@ namespace art { #define LD_LATENCY 2 static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) { - int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->alias_info); - int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->alias_info); - int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->alias_info); - int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->alias_info); + int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info); + int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info); + int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info); + int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info); return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo); } @@ -78,7 +78,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { } for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) { - if (is_pseudo_opcode(this_lir->opcode)) { + if (IsPseudoLirOp(this_lir->opcode)) { continue; } @@ -99,15 +99,14 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { int native_reg_id; if (cu_->instruction_set == kX86) { // If x86, location differs depending on whether memory/reg operation. - native_reg_id = (GetTargetInstFlags(this_lir->opcode) & IS_STORE) ? this_lir->operands[2] - : this_lir->operands[0]; + native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0]; } else { native_reg_id = this_lir->operands[0]; } - bool is_this_lir_load = GetTargetInstFlags(this_lir->opcode) & IS_LOAD; + bool is_this_lir_load = target_flags & IS_LOAD; LIR* check_lir; /* Use the mem mask to determine the rough memory location */ - uint64_t this_mem_mask = (this_lir->use_mask | this_lir->def_mask) & ENCODE_MEM; + uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM; /* * Currently only eliminate redundant ld/st for constant and Dalvik @@ -117,10 +116,10 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM; + uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; uint64_t stop_use_reg_mask; if (cu_->instruction_set == kX86) { - stop_use_reg_mask = (IS_BRANCH | this_lir->use_mask) & ~ENCODE_MEM; + stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM; } else { /* * Add pc to the resource mask to prevent this instruction @@ -128,7 +127,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * region bits since stop_mask is used to check data/control * dependencies. */ - stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->use_mask) & ~ENCODE_MEM; + stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM; } for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) { @@ -136,11 +135,11 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * Skip already dead instructions (whose dataflow information is * outdated and misleading). */ - if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) { + if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) { continue; } - uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM; + uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM; uint64_t alias_condition = this_mem_mask & check_mem_mask; bool stop_here = false; @@ -160,7 +159,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { */ DCHECK(!(check_flags & IS_STORE)); /* Same value && same register type */ - if (check_lir->alias_info == this_lir->alias_info && + if (check_lir->flags.alias_info == this_lir->flags.alias_info && SameRegType(check_lir->operands[0], native_reg_id)) { /* * Different destination register - insert @@ -169,11 +168,11 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { if (check_lir->operands[0] != native_reg_id) { ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id); } - check_lir->flags.is_nop = true; + NopLIR(check_lir); } } else if (alias_condition == ENCODE_DALVIK_REG) { /* Must alias */ - if (check_lir->alias_info == this_lir->alias_info) { + if (check_lir->flags.alias_info == this_lir->flags.alias_info) { /* Only optimize compatible registers */ bool reg_compatible = SameRegType(check_lir->operands[0], native_reg_id); if ((is_this_lir_load && is_check_lir_load) || @@ -188,7 +187,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { native_reg_id) { ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id); } - check_lir->flags.is_nop = true; + NopLIR(check_lir); } else { /* * Destinaions are of different types - @@ -202,7 +201,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { stop_here = true; } else if (!is_this_lir_load && !is_check_lir_load) { /* WAW - nuke the earlier store */ - this_lir->flags.is_nop = true; + NopLIR(this_lir); stop_here = true; } /* Partial overlap */ @@ -257,7 +256,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * top-down order. */ InsertLIRBefore(check_lir, new_store_lir); - this_lir->flags.is_nop = true; + NopLIR(this_lir); } break; } else if (!check_lir->flags.is_nop) { @@ -286,7 +285,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Start from the second instruction */ for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) { - if (is_pseudo_opcode(this_lir->opcode)) { + if (IsPseudoLirOp(this_lir->opcode)) { continue; } @@ -298,7 +297,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t stop_use_all_mask = this_lir->use_mask; + uint64_t stop_use_all_mask = this_lir->u.m.use_mask; if (cu_->instruction_set != kX86) { /* @@ -314,7 +313,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Similar as above, but just check for pure register dependency */ uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM; - uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM; + uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; int next_slot = 0; bool stop_here = false; @@ -329,7 +328,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t check_mem_mask = check_lir->def_mask & ENCODE_MEM; + uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM; uint64_t alias_condition = stop_use_all_mask & check_mem_mask; stop_here = false; @@ -338,7 +337,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* We can fully disambiguate Dalvik references */ if (alias_condition == ENCODE_DALVIK_REG) { /* Must alias or partually overlap */ - if ((check_lir->alias_info == this_lir->alias_info) || + if ((check_lir->flags.alias_info == this_lir->flags.alias_info) || IsDalvikRegisterClobbered(this_lir, check_lir)) { stop_here = true; } @@ -363,7 +362,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * Store the dependent or non-pseudo/indepedent instruction to the * list. */ - if (stop_here || !is_pseudo_opcode(check_lir->opcode)) { + if (stop_here || !IsPseudoLirOp(check_lir->opcode)) { prev_inst_list[next_slot++] = check_lir; if (next_slot == MAX_HOIST_DISTANCE) { break; @@ -394,7 +393,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { int slot; LIR* dep_lir = prev_inst_list[next_slot-1]; /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */ - if (!is_pseudo_opcode(dep_lir->opcode) && + if (!IsPseudoLirOp(dep_lir->opcode) && (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) { first_slot -= LDLD_DISTANCE; } @@ -407,7 +406,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { LIR* prev_lir = prev_inst_list[slot+1]; /* Check the highest instruction */ - if (prev_lir->def_mask == ENCODE_ALL) { + if (prev_lir->u.m.def_mask == ENCODE_ALL) { /* * If the first instruction is a load, don't hoist anything * above it since it is unlikely to be beneficial. @@ -435,9 +434,9 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * Try to find two instructions with load/use dependency until * the remaining instructions are less than LD_LATENCY. */ - bool prev_is_load = is_pseudo_opcode(prev_lir->opcode) ? false : + bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false : (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD); - if (((cur_lir->use_mask & prev_lir->def_mask) && prev_is_load) || (slot < LD_LATENCY)) { + if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) { break; } } @@ -453,7 +452,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * is never the first LIR on the list */ InsertLIRBefore(cur_lir, new_load_lir); - this_lir->flags.is_nop = true; + NopLIR(this_lir); } } } @@ -468,41 +467,4 @@ void Mir2Lir::ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir) { } } -/* - * Nop any unconditional branches that go to the next instruction. - * Note: new redundant branches may be inserted later, and we'll - * use a check in final instruction assembly to nop those out. - */ -void Mir2Lir::RemoveRedundantBranches() { - LIR* this_lir; - - for (this_lir = first_lir_insn_; this_lir != last_lir_insn_; this_lir = NEXT_LIR(this_lir)) { - /* Branch to the next instruction */ - if (IsUnconditionalBranch(this_lir)) { - LIR* next_lir = this_lir; - - while (true) { - next_lir = NEXT_LIR(next_lir); - - /* - * Is the branch target the next instruction? - */ - if (next_lir == this_lir->target) { - this_lir->flags.is_nop = true; - break; - } - - /* - * Found real useful stuff between the branch and the target. - * Need to explicitly check the last_lir_insn_ here because it - * might be the last real instruction. - */ - if (!is_pseudo_opcode(next_lir->opcode) || - (next_lir == last_lir_insn_)) - break; - } - } - } -} - } // namespace art diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc index cd25232c21..5f5e5e44ac 100644 --- a/compiler/dex/quick/mips/assemble_mips.cc +++ b/compiler/dex/quick/mips/assemble_mips.cc @@ -489,12 +489,12 @@ void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) { LIR* curr_pc = RawLIR(dalvik_offset, kMipsCurrPC); InsertLIRBefore(lir, curr_pc); LIR* anchor = RawLIR(dalvik_offset, kPseudoTargetLabel); - LIR* delta_hi = RawLIR(dalvik_offset, kMipsDeltaHi, r_AT, 0, - reinterpret_cast<uintptr_t>(anchor), 0, 0, lir->target); + LIR* delta_hi = RawLIR(dalvik_offset, kMipsDeltaHi, r_AT, 0, WrapPointer(anchor), 0, 0, + lir->target); InsertLIRBefore(lir, delta_hi); InsertLIRBefore(lir, anchor); - LIR* delta_lo = RawLIR(dalvik_offset, kMipsDeltaLo, r_AT, 0, - reinterpret_cast<uintptr_t>(anchor), 0, 0, lir->target); + LIR* delta_lo = RawLIR(dalvik_offset, kMipsDeltaLo, r_AT, 0, WrapPointer(anchor), 0, 0, + lir->target); InsertLIRBefore(lir, delta_lo); LIR* addu = RawLIR(dalvik_offset, kMipsAddu, r_AT, r_AT, r_RA); InsertLIRBefore(lir, addu); @@ -503,7 +503,7 @@ void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) { if (!unconditional) { InsertLIRBefore(lir, hop_target); } - lir->flags.is_nop = true; + NopLIR(lir); } /* @@ -512,7 +512,7 @@ void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) { * instruction. In those cases we will try to substitute a new code * sequence or request that the trace be shortened and retried. */ -AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { +AssemblerStatus MipsMir2Lir::AssembleInstructions(CodeOffset start_addr) { LIR *lir; AssemblerStatus res = kSuccess; // Assume success @@ -526,7 +526,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { continue; } - if (lir->flags.pcRelFixup) { + if (lir->flags.fixup != kFixupNone) { if (lir->opcode == kMipsDelta) { /* * The "Delta" pseudo-ops load the difference between @@ -538,8 +538,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { * and is found in lir->target. If operands[3] is non-NULL, * then it is a Switch/Data table. */ - int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset; + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; int delta = offset2 - offset1; if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) { @@ -561,25 +561,25 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { RawLIR(lir->dalvik_offset, kMipsAddu, lir->operands[0], lir->operands[0], r_RA); InsertLIRBefore(lir, new_addu); - lir->flags.is_nop = true; + NopLIR(lir); res = kRetryAll; } } else if (lir->opcode == kMipsDeltaLo) { - int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset; + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; int delta = offset2 - offset1; lir->operands[1] = delta & 0xffff; } else if (lir->opcode == kMipsDeltaHi) { - int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset; + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; int delta = offset2 - offset1; lir->operands[1] = (delta >> 16) & 0xffff; } else if (lir->opcode == kMipsB || lir->opcode == kMipsBal) { LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset; int delta = target - pc; if (delta & 0x3) { LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; @@ -592,8 +592,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { } } else if (lir->opcode >= kMipsBeqz && lir->opcode <= kMipsBnez) { LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset; int delta = target - pc; if (delta & 0x3) { LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; @@ -606,8 +606,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { } } else if (lir->opcode == kMipsBeq || lir->opcode == kMipsBne) { LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset; int delta = target - pc; if (delta & 0x3) { LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; @@ -619,8 +619,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { lir->operands[2] = delta >> 2; } } else if (lir->opcode == kMipsJal) { - uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3; - uintptr_t target = lir->operands[0]; + CodeOffset cur_pc = (start_addr + lir->offset + 4) & ~3; + CodeOffset target = lir->operands[0]; /* ensure PC-region branch can be used */ DCHECK_EQ((cur_pc & 0xF0000000), (target & 0xF0000000)); if (target & 0x3) { @@ -629,11 +629,11 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { lir->operands[0] = target >> 2; } else if (lir->opcode == kMipsLahi) { /* ld address hi (via lui) */ LIR *target_lir = lir->target; - uintptr_t target = start_addr + target_lir->offset; + CodeOffset target = start_addr + target_lir->offset; lir->operands[1] = target >> 16; } else if (lir->opcode == kMipsLalo) { /* ld address lo (via ori) */ LIR *target_lir = lir->target; - uintptr_t target = start_addr + target_lir->offset; + CodeOffset target = start_addr + target_lir->offset; lir->operands[2] = lir->operands[2] + target; } } @@ -646,6 +646,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { if (res != kSuccess) { continue; } + DCHECK(!IsPseudoLirOp(lir->opcode)); const MipsEncodingMap *encoder = &EncodingMap[lir->opcode]; uint32_t bits = encoder->skeleton; int i; @@ -695,6 +696,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { code_buffer_.push_back((bits >> 24) & 0xff); // TUNING: replace with proper delay slot handling if (encoder->size == 8) { + DCHECK(!IsPseudoLirOp(lir->opcode)); const MipsEncodingMap *encoder = &EncodingMap[kMipsNop]; uint32_t bits = encoder->skeleton; code_buffer_.push_back(bits & 0xff); @@ -707,7 +709,105 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { } int MipsMir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); return EncodingMap[lir->opcode].size; } +// LIR offset assignment. +// TODO: consolidate w/ Arm assembly mechanism. +int MipsMir2Lir::AssignInsnOffsets() { + LIR* lir; + int offset = 0; + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (LIKELY(lir->opcode >= 0)) { + if (!lir->flags.is_nop) { + offset += lir->flags.size; + } + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + if (offset & 0x2) { + offset += 2; + lir->operands[0] = 1; + } else { + lir->operands[0] = 0; + } + } + /* Pseudo opcodes don't consume space */ + } + return offset; +} + +/* + * Walk the compilation unit and assign offsets to instructions + * and literals and compute the total size of the compiled unit. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void MipsMir2Lir::AssignOffsets() { + int offset = AssignInsnOffsets(); + + /* Const values have to be word aligned */ + offset = (offset + 3) & ~3; + + /* Set up offsets for literals */ + data_offset_ = offset; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + offset = AssignFillArrayDataOffset(offset); + + total_size_ = offset; +} + +/* + * Go over each instruction in the list and calculate the offset from the top + * before sending them off to the assembler. If out-of-range branch distance is + * seen rearrange the instructions a bit to correct it. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void MipsMir2Lir::AssembleLIR() { + cu_->NewTimingSplit("Assemble"); + AssignOffsets(); + int assembler_retries = 0; + /* + * Assemble here. Note that we generate code with optimistic assumptions + * and if found now to work, we'll have to redo the sequence and retry. + */ + + while (true) { + AssemblerStatus res = AssembleInstructions(0); + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + // Redo offsets and try again + AssignOffsets(); + code_buffer_.clear(); + } + } + + // Install literals + cu_->NewTimingSplit("LiteralData"); + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); +} + } // namespace art diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index d53c012466..18c8cf87f2 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -59,14 +59,14 @@ void MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, * done: * */ -void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, +void MipsMir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); } // Add the table to the list - we'll process it later - SwitchTable *tab_rec = + SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; @@ -101,8 +101,7 @@ void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, // Remember base label so offsets can be computed later tab_rec->anchor = base_label; int rBase = AllocTemp(); - NewLIR4(kMipsDelta, rBase, 0, reinterpret_cast<uintptr_t>(base_label), - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR4(kMipsDelta, rBase, 0, WrapPointer(base_label), WrapPointer(tab_rec)); OpRegRegReg(kOpAdd, rEnd, rEnd, rBase); // Grab switch test value @@ -138,20 +137,20 @@ void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, * jr r_RA * done: */ -void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, +void MipsMir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); } // Add the table to the list - we'll process it later - SwitchTable *tab_rec = + SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + ArenaAllocator::kAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -196,8 +195,7 @@ void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, // Materialize the table base pointer int rBase = AllocTemp(); - NewLIR4(kMipsDelta, rBase, 0, reinterpret_cast<uintptr_t>(base_label), - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR4(kMipsDelta, rBase, 0, WrapPointer(base_label), WrapPointer(tab_rec)); // Load the displacement from the switch table int r_disp = AllocTemp(); @@ -222,10 +220,10 @@ void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, * * Total size is 4+(width * size + 1)/2 16-bit code units. */ -void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { +void MipsMir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; // Add the table to the list - we'll process it later - FillArrayData *tab_rec = + FillArrayData* tab_rec = reinterpret_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData)); tab_rec->table = table; @@ -252,8 +250,7 @@ void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { LIR* base_label = NewLIR0(kPseudoTargetLabel); // Materialize a pointer to the fill data image - NewLIR4(kMipsDelta, rMIPS_ARG1, 0, reinterpret_cast<uintptr_t>(base_label), - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR4(kMipsDelta, rMIPS_ARG1, 0, WrapPointer(base_label), WrapPointer(tab_rec)); // And go... ClobberCalleeSave(); @@ -261,36 +258,6 @@ void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { MarkSafepointPC(call_inst); } -/* - * TODO: implement fast path to short-circuit thin-lock case - */ -void MipsMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rMIPS_ARG0); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags); - // Go expensive route - artLockObjectFromCode(self, obj); - int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pLockObject)); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, r_tgt); - MarkSafepointPC(call_inst); -} - -/* - * TODO: implement fast path to short-circuit thin-lock case - */ -void MipsMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rMIPS_ARG0); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags); - // Go expensive route - UnlockObjectFromCode(obj); - int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pUnlockObject)); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, r_tgt); - MarkSafepointPC(call_inst); -} - void MipsMir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); @@ -318,6 +285,7 @@ void MipsMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) { FreeTemp(reg_card_base); FreeTemp(reg_card_no); } + void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { int spill_count = num_core_spills_ + num_fp_spills_; /* diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index b9cb720962..88b244ba90 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -52,7 +52,6 @@ class MipsMir2Lir : public Mir2Lir { int AllocTypedTempPair(bool fp_hint, int reg_class); int S2d(int low_reg, int high_reg); int TargetReg(SpecialTargetRegister reg); - RegisterInfo* GetRegInfo(int reg); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -72,9 +71,12 @@ class MipsMir2Lir : public Mir2Lir { void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. - AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void AssembleLIR(); + int AssignInsnOffsets(); + void AssignOffsets(); + AssemblerStatus AssembleInstructions(CodeOffset start_addr); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); @@ -86,12 +88,10 @@ class MipsMir2Lir : public Mir2Lir { // Required for target - Dalvik-level generators. void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, - RegLocation rl_src, int scale); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale); + RegLocation rl_index, RegLocation rl_dest, int scale); void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -107,6 +107,8 @@ class MipsMir2Lir : public Mir2Lir { bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -124,8 +126,6 @@ class MipsMir2Lir : public Mir2Lir { void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); void GenMemBarrier(MemBarrierKind barrier_kind); - void GenMonitorEnter(int opt_flags, RegLocation rl_src); - void GenMonitorExit(int opt_flags, RegLocation rl_src); void GenMoveException(RegLocation rl_dest); void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit); diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index 6ce5750a5f..52294290c9 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -268,6 +268,37 @@ bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info) { return false; } +bool MipsMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + if (size != kSignedByte) { + // MIPS supports only aligned access. Defer unaligned access to JNI implementation. + return false; + } + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + DCHECK(size == kSignedByte); + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + return true; +} + +bool MipsMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + if (size != kSignedByte) { + // MIPS supports only aligned access. Defer unaligned access to JNI implementation. + return false; + } + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + DCHECK(size == kSignedByte); + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); + return true; +} + LIR* MipsMir2Lir::OpPcRelLoad(int reg, LIR* target) { LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips"; return NULL; @@ -484,7 +515,7 @@ void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, * */ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -498,12 +529,14 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, rl_array = LoadValue(rl_array, kCoreReg); rl_index = LoadValue(rl_index, kCoreReg); int reg_ptr = INVALID_REG; - if (IsTemp(rl_array.low_reg)) { + bool allocated_reg_ptr_temp = false; + if (IsTemp(rl_array.low_reg) && !card_mark) { Clobber(rl_array.low_reg); reg_ptr = rl_array.low_reg; } else { reg_ptr = AllocTemp(); OpRegCopy(reg_ptr, rl_array.low_reg); + allocated_reg_ptr_temp = true; } /* null object? */ @@ -538,8 +571,6 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } StoreBaseDispWide(reg_ptr, 0, rl_src.low_reg, rl_src.high_reg); - - FreeTemp(reg_ptr); } else { rl_src = LoadValue(rl_src, reg_class); if (needs_range_check) { @@ -549,65 +580,11 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg, scale, size); } -} - -/* - * Generate array store - * - */ -void MipsMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { - int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); - - FlushAllRegs(); // Use explicit registers - LockCallTemps(); - - int r_value = TargetReg(kArg0); // Register holding value - int r_array_class = TargetReg(kArg1); // Register holding array's Class - int r_array = TargetReg(kArg2); // Register holding array - int r_index = TargetReg(kArg3); // Register holding index into array - - LoadValueDirectFixed(rl_array, r_array); // Grab array - LoadValueDirectFixed(rl_src, r_value); // Grab value - LoadValueDirectFixed(rl_index, r_index); // Grab index - - GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? - - // Store of null? - LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); - - // Get the array's class. - LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value, - r_array_class, true); - // Redo LoadValues in case they didn't survive the call. - LoadValueDirectFixed(rl_array, r_array); // Reload array - LoadValueDirectFixed(rl_index, r_index); // Reload index - LoadValueDirectFixed(rl_src, r_value); // Reload value - r_array_class = INVALID_REG; - - // Branch here if value to be stored == null - LIR* target = NewLIR0(kPseudoTargetLabel); - null_value_check->target = target; - - bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); - int reg_len = INVALID_REG; - if (needs_range_check) { - reg_len = TargetReg(kArg1); - LoadWordDisp(r_array, len_offset, reg_len); // Get len - } - /* r_ptr -> array data */ - int r_ptr = AllocTemp(); - OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset); - if (needs_range_check) { - GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds); + if (allocated_reg_ptr_temp) { + FreeTemp(reg_ptr); } - StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord); - FreeTemp(r_ptr); - FreeTemp(r_index); - if (!mir_graph_->IsConstantNullRef(rl_src)) { - MarkGCCard(r_value, r_array); + if (card_mark) { + MarkGCCard(rl_src.low_reg, rl_array.low_reg); } } diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 4ee5b23eb9..9c598e6bee 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -76,6 +76,8 @@ int MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res = rMIPS_RET0; break; case kRet1: res = rMIPS_RET1; break; case kInvokeTgt: res = rMIPS_INVOKE_TGT; break; + case kHiddenArg: res = r_T0; break; + case kHiddenFpArg: res = INVALID_REG; break; case kCount: res = rMIPS_COUNT; break; } return res; @@ -120,22 +122,21 @@ uint64_t MipsMir2Lir::GetPCUseDefEncoding() { } -void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir) { +void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { DCHECK_EQ(cu_->instruction_set, kMips); + DCHECK(!lir->flags.use_def_invalid); // Mips-specific resource map setup here. - uint64_t flags = MipsMir2Lir::EncodingMap[lir->opcode].flags; - if (flags & REG_DEF_SP) { - lir->def_mask |= ENCODE_MIPS_REG_SP; + lir->u.m.def_mask |= ENCODE_MIPS_REG_SP; } if (flags & REG_USE_SP) { - lir->use_mask |= ENCODE_MIPS_REG_SP; + lir->u.m.use_mask |= ENCODE_MIPS_REG_SP; } if (flags & REG_DEF_LR) { - lir->def_mask |= ENCODE_MIPS_REG_LR; + lir->u.m.def_mask |= ENCODE_MIPS_REG_LR; } } @@ -269,8 +270,8 @@ void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, uint64_t mask, const char *pre } /* Memory bits */ if (mips_lir && (mask & ENCODE_DALVIK_REG)) { - sprintf(buf + strlen(buf), "dr%d%s", mips_lir->alias_info & 0xffff, - (mips_lir->alias_info & 0x80000000) ? "(+1)" : ""); + sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info), + DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : ""); } if (mask & ENCODE_LITERAL) { strcat(buf, "lit "); @@ -399,11 +400,6 @@ RegLocation MipsMir2Lir::GetReturnAlt() { return res; } -MipsMir2Lir::RegisterInfo* MipsMir2Lir::GetRegInfo(int reg) { - return MIPS_FPREG(reg) ? ®_pool_->FPRegs[reg & MIPS_FP_REG_MASK] - : ®_pool_->core_regs[reg]; -} - /* To be used when explicitly managing register use */ void MipsMir2Lir::LockCallTemps() { LockTemp(rMIPS_ARG0); @@ -559,14 +555,17 @@ Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, } uint64_t MipsMir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return MipsMir2Lir::EncodingMap[opcode].flags; } const char* MipsMir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return MipsMir2Lir::EncodingMap[opcode].name; } const char* MipsMir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return MipsMir2Lir::EncodingMap[opcode].fmt; } diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 5d9ae33921..2ba2c8487d 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -93,7 +93,7 @@ LIR* MipsMir2Lir::LoadConstantNoClobber(int r_dest, int value) { } else if ((value < 0) && (value >= -32768)) { res = NewLIR3(kMipsAddiu, r_dest, r_ZERO, value); } else { - res = NewLIR2(kMipsLui, r_dest, value>>16); + res = NewLIR2(kMipsLui, r_dest, value >> 16); if (value & 0xffff) NewLIR3(kMipsOri, r_dest, r_dest, value); } diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 440df2afa6..1a30b7aef0 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -33,12 +33,17 @@ inline void Mir2Lir::ClobberBody(RegisterInfo* p) { p->def_end = NULL; if (p->pair) { p->pair = false; - Clobber(p->partner); + p = GetRegInfo(p->partner); + p->pair = false; + p->live = false; + p->s_reg = INVALID_SREG; + p->def_start = NULL; + p->def_end = NULL; } } } -inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, +inline LIR* Mir2Lir::RawLIR(DexOffset dalvik_offset, int opcode, int op0, int op1, int op2, int op3, int op4, LIR* target) { LIR* insn = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); insn->dalvik_offset = dalvik_offset; @@ -53,7 +58,8 @@ inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, if ((opcode == kPseudoTargetLabel) || (opcode == kPseudoSafepointPC) || (opcode == kPseudoExportedPC)) { // Always make labels scheduling barriers - insn->use_mask = insn->def_mask = ENCODE_ALL; + DCHECK(!insn->flags.use_def_invalid); + insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL; } return insn; } @@ -63,7 +69,7 @@ inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, * operands. */ inline LIR* Mir2Lir::NewLIR0(int opcode) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -73,7 +79,7 @@ inline LIR* Mir2Lir::NewLIR0(int opcode) { } inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -83,7 +89,7 @@ inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) { } inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -93,7 +99,7 @@ inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) { } inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -103,7 +109,7 @@ inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) { } inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -114,7 +120,7 @@ inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -136,20 +142,23 @@ inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) { inline void Mir2Lir::SetupResourceMasks(LIR* lir) { int opcode = lir->opcode; - if (opcode <= 0) { - lir->use_mask = lir->def_mask = 0; + if (IsPseudoLirOp(opcode)) { + if (opcode != kPseudoBarrier) { + lir->flags.fixup = kFixupLabel; + } return; } uint64_t flags = GetTargetInstFlags(opcode); if (flags & NEEDS_FIXUP) { - lir->flags.pcRelFixup = true; + // Note: target-specific setup may specialize the fixup kind. + lir->flags.fixup = kFixupLabel; } /* Get the starting size of the instruction's template */ lir->flags.size = GetInsnSize(lir); - + estimated_native_code_size_ += lir->flags.size; /* Set up the mask for resources that are updated */ if (flags & (IS_LOAD | IS_STORE)) { /* Default to heap - will catch specialized classes later */ @@ -161,39 +170,49 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir) { * turn will trash everything. */ if (flags & IS_BRANCH) { - lir->def_mask = lir->use_mask = ENCODE_ALL; + lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL; return; } if (flags & REG_DEF0) { - SetupRegMask(&lir->def_mask, lir->operands[0]); + SetupRegMask(&lir->u.m.def_mask, lir->operands[0]); } if (flags & REG_DEF1) { - SetupRegMask(&lir->def_mask, lir->operands[1]); + SetupRegMask(&lir->u.m.def_mask, lir->operands[1]); } + if (flags & REG_USE0) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[0]); + } - if (flags & SETS_CCODES) { - lir->def_mask |= ENCODE_CCODE; + if (flags & REG_USE1) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[1]); + } + + if (flags & REG_USE2) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[2]); } - if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) { - int i; + if (flags & REG_USE3) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[3]); + } - for (i = 0; i < 4; i++) { - if (flags & (1 << (kRegUse0 + i))) { - SetupRegMask(&lir->use_mask, lir->operands[i]); - } - } + if (flags & SETS_CCODES) { + lir->u.m.def_mask |= ENCODE_CCODE; } if (flags & USES_CCODES) { - lir->use_mask |= ENCODE_CCODE; + lir->u.m.use_mask |= ENCODE_CCODE; } // Handle target-specific actions - SetupTargetResourceMasks(lir); + SetupTargetResourceMasks(lir, flags); +} + +inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(int reg) { + DCHECK(reginfo_map_.Get(reg) != NULL); + return reginfo_map_.Get(reg); } } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index c41feb1348..fa9a3ad566 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -18,6 +18,7 @@ #include "dex/dataflow_iterator-inl.h" #include "mir_to_lir-inl.h" #include "object_utils.h" +#include "thread-inl.h" namespace art { @@ -240,9 +241,9 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::GOTO_16: case Instruction::GOTO_32: if (mir_graph_->IsBackedge(bb, bb->taken)) { - GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken->id]); + GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]); } else { - OpUnconditionalBranch(&label_list[bb->taken->id]); + OpUnconditionalBranch(&label_list[bb->taken]); } break; @@ -271,23 +272,22 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::IF_GE: case Instruction::IF_GT: case Instruction::IF_LE: { - LIR* taken = &label_list[bb->taken->id]; - LIR* fall_through = &label_list[bb->fall_through->id]; + LIR* taken = &label_list[bb->taken]; + LIR* fall_through = &label_list[bb->fall_through]; // Result known at compile time? if (rl_src[0].is_const && rl_src[1].is_const) { bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), mir_graph_->ConstantValue(rl_src[1].orig_sreg)); - BasicBlock* target = is_taken ? bb->taken : bb->fall_through; - if (mir_graph_->IsBackedge(bb, target)) { + BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through; + if (mir_graph_->IsBackedge(bb, target_id)) { GenSuspendTest(opt_flags); } - OpUnconditionalBranch(&label_list[target->id]); + OpUnconditionalBranch(&label_list[target_id]); } else { if (mir_graph_->IsBackwardsBranch(bb)) { GenSuspendTest(opt_flags); } - GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, - fall_through); + GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, fall_through); } break; } @@ -298,16 +298,16 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::IF_GEZ: case Instruction::IF_GTZ: case Instruction::IF_LEZ: { - LIR* taken = &label_list[bb->taken->id]; - LIR* fall_through = &label_list[bb->fall_through->id]; + LIR* taken = &label_list[bb->taken]; + LIR* fall_through = &label_list[bb->fall_through]; // Result known at compile time? if (rl_src[0].is_const) { bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0); - BasicBlock* target = is_taken ? bb->taken : bb->fall_through; - if (mir_graph_->IsBackedge(bb, target)) { + BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through; + if (mir_graph_->IsBackedge(bb, target_id)) { GenSuspendTest(opt_flags); } - OpUnconditionalBranch(&label_list[target->id]); + OpUnconditionalBranch(&label_list[target_id]); } else { if (mir_graph_->IsBackwardsBranch(bb)) { GenSuspendTest(opt_flags); @@ -337,22 +337,35 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1); break; case Instruction::APUT_WIDE: - GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3); + GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3, false); break; case Instruction::APUT: - GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2); - break; - case Instruction::APUT_OBJECT: - GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0], 2); + GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, false); + break; + case Instruction::APUT_OBJECT: { + bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]); + bool is_safe = is_null; // Always safe to store null. + if (!is_safe) { + // Check safety from verifier type information. + const MethodReference mr(cu_->dex_file, cu_->method_idx); + is_safe = cu_->compiler_driver->IsSafeCast(mr, mir->offset); + } + if (is_null || is_safe) { + // Store of constant null doesn't require an assignability test and can be generated inline + // without fixed register usage or a card mark. + GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, !is_null); + } else { + GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0]); + } break; + } case Instruction::APUT_SHORT: case Instruction::APUT_CHAR: - GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1); + GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1, false); break; case Instruction::APUT_BYTE: case Instruction::APUT_BOOLEAN: - GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], - rl_src[0], 0); + GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], rl_src[0], 0, false); break; case Instruction::IGET_OBJECT: @@ -696,6 +709,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { // Insert the block label. block_label_list_[block_id].opcode = kPseudoNormalBlockLabel; + block_label_list_[block_id].flags.fixup = kFixupLabel; AppendLIR(&block_label_list_[block_id]); LIR* head_lir = NULL; @@ -706,16 +720,15 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { } // Free temp registers and reset redundant store tracking. - ResetRegPool(); - ResetDefTracking(); - ClobberAllRegs(); if (bb->block_type == kEntryBlock) { + ResetRegPool(); int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->reg_location_[mir_graph_->GetMethodSReg()]); } else if (bb->block_type == kExitBlock) { + ResetRegPool(); GenExitSequence(); } @@ -736,17 +749,18 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { current_dalvik_offset_ = mir->offset; int opcode = mir->dalvikInsn.opcode; - LIR* boundary_lir; // Mark the beginning of a Dalvik instruction for line tracking. - char* inst_str = cu_->verbose ? - mir_graph_->GetDalvikDisassembly(mir) : NULL; - boundary_lir = MarkBoundary(mir->offset, inst_str); + if (cu_->verbose) { + char* inst_str = mir_graph_->GetDalvikDisassembly(mir); + MarkBoundary(mir->offset, inst_str); + } // Remember the first LIR for this block. if (head_lir == NULL) { - head_lir = boundary_lir; - // Set the first boundary_lir as a scheduling barrier. - head_lir->def_mask = ENCODE_ALL; + head_lir = &block_label_list_[bb->id]; + // Set the first label as a scheduling barrier. + DCHECK(!head_lir->flags.use_def_invalid); + head_lir->u.m.def_mask = ENCODE_ALL; } if (opcode == kMirOpCheck) { @@ -771,11 +785,6 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { if (head_lir) { // Eliminate redundant loads/stores and delay stores into later slots. ApplyLocalOptimizations(head_lir, last_lir_insn_); - - // Generate an unconditional branch to the fallthrough block. - if (bb->fall_through) { - OpUnconditionalBranch(&block_label_list_[bb->fall_through->id]); - } } return false; } @@ -810,25 +819,34 @@ void Mir2Lir::SpecialMIR2LIR(SpecialCaseHandler special_case) { } void Mir2Lir::MethodMIR2LIR() { + cu_->NewTimingSplit("MIR2LIR"); + // Hold the labels of each block. block_label_list_ = static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(), ArenaAllocator::kAllocLIR)); - PreOrderDfsIterator iter(mir_graph_, false /* not iterative */); - for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { - MethodBlockCodeGen(bb); + PreOrderDfsIterator iter(mir_graph_); + BasicBlock* curr_bb = iter.Next(); + BasicBlock* next_bb = iter.Next(); + while (curr_bb != NULL) { + MethodBlockCodeGen(curr_bb); + // If the fall_through block is no longer laid out consecutively, drop in a branch. + BasicBlock* curr_bb_fall_through = mir_graph_->GetBasicBlock(curr_bb->fall_through); + if ((curr_bb_fall_through != NULL) && (curr_bb_fall_through != next_bb)) { + OpUnconditionalBranch(&block_label_list_[curr_bb->fall_through]); + } + curr_bb = next_bb; + do { + next_bb = iter.Next(); + } while ((next_bb != NULL) && (next_bb->block_type == kDead)); } - + cu_->NewTimingSplit("Launchpads"); HandleSuspendLaunchPads(); HandleThrowLaunchPads(); HandleIntrinsicLaunchPads(); - - if (!(cu_->disable_opt & (1 << kSafeOptimizations))) { - RemoveRedundantBranches(); - } } } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index a37ebd173f..4c56b74dc4 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -30,6 +30,14 @@ namespace art { +/* + * TODO: refactoring pass to move these (and other) typdefs towards usage style of runtime to + * add type safety (see runtime/offsets.h). + */ +typedef uint32_t DexOffset; // Dex offset in code units. +typedef uint16_t NarrowDexOffset; // For use in structs, Dex offsets range from 0 .. 0xffff. +typedef uint32_t CodeOffset; // Native code offset in bytes. + // Set to 1 to measure cost of suspend check. #define NO_SUSPEND 0 @@ -95,6 +103,7 @@ struct BasicBlock; struct CallInfo; struct CompilationUnit; struct MIR; +struct LIR; struct RegLocation; struct RegisterInfo; class MIRGraph; @@ -107,24 +116,36 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, typedef std::vector<uint8_t> CodeBuffer; +struct UseDefMasks { + uint64_t use_mask; // Resource mask for use. + uint64_t def_mask; // Resource mask for def. +}; + +struct AssemblyInfo { + LIR* pcrel_next; // Chain of LIR nodes needing pc relative fixups. + uint8_t bytes[16]; // Encoded instruction bytes. +}; struct LIR { - int offset; // Offset of this instruction. - int dalvik_offset; // Offset of Dalvik opcode. + CodeOffset offset; // Offset of this instruction. + NarrowDexOffset dalvik_offset; // Offset of Dalvik opcode in code units (16-bit words). + int16_t opcode; LIR* next; LIR* prev; LIR* target; - int opcode; - int operands[5]; // [0..4] = [dest, src1, src2, extra, extra2]. struct { - bool is_nop:1; // LIR is optimized away. - bool pcRelFixup:1; // May need pc-relative fixup. - unsigned int size:5; // Note: size is in bytes. - unsigned int unused:25; + unsigned int alias_info:17; // For Dalvik register disambiguation. + bool is_nop:1; // LIR is optimized away. + unsigned int size:4; // Note: size of encoded instruction is in bytes. + bool use_def_invalid:1; // If true, masks should not be used. + unsigned int generation:1; // Used to track visitation state during fixup pass. + unsigned int fixup:8; // Fixup kind. } flags; - int alias_info; // For Dalvik register & litpool disambiguation. - uint64_t use_mask; // Resource mask for use. - uint64_t def_mask; // Resource mask for def. + union { + UseDefMasks m; // Use & Def masks used during optimization. + AssemblyInfo a; // Instruction encoding used during assembly phase. + } u; + int32_t operands[5]; // [0..4] = [dest, src1, src2, extra, extra2]. }; // Target-specific initialization. @@ -141,7 +162,7 @@ Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, // Defines for alias_info (tracks Dalvik register references). #define DECODE_ALIAS_INFO_REG(X) (X & 0xffff) -#define DECODE_ALIAS_INFO_WIDE_FLAG (0x80000000) +#define DECODE_ALIAS_INFO_WIDE_FLAG (0x10000) #define DECODE_ALIAS_INFO_WIDE(X) ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0) #define ENCODE_ALIAS_INFO(REG, ISWIDE) (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0)) @@ -158,36 +179,42 @@ Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, #define ENCODE_ALL (~0ULL) #define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \ ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS) + +// Mask to denote sreg as the start of a double. Must not interfere with low 16 bits. +#define STARTING_DOUBLE_SREG 0x10000 + // TODO: replace these macros #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath)) #define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath)) #define SLOW_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowStringPath)) #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath)) #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath)) -#define is_pseudo_opcode(opcode) (static_cast<int>(opcode) < 0) class Mir2Lir : public Backend { public: - struct SwitchTable { - int offset; - const uint16_t* table; // Original dex table. - int vaddr; // Dalvik offset of switch opcode. - LIR* anchor; // Reference instruction for relative offsets. - LIR** targets; // Array of case targets. + /* + * Auxiliary information describing the location of data embedded in the Dalvik + * byte code stream. + */ + struct EmbeddedData { + CodeOffset offset; // Code offset of data block. + const uint16_t* table; // Original dex data. + DexOffset vaddr; // Dalvik offset of parent opcode. }; - struct FillArrayData { - int offset; - const uint16_t* table; // Original dex table. - int size; - int vaddr; // Dalvik offset of FILL_ARRAY_DATA opcode. + struct FillArrayData : EmbeddedData { + int32_t size; + }; + + struct SwitchTable : EmbeddedData { + LIR* anchor; // Reference instruction for relative offsets. + LIR** targets; // Array of case targets. }; /* Static register use counts */ struct RefCounts { int count; int s_reg; - bool double_start; // Starting v_reg for a double }; /* @@ -241,6 +268,38 @@ class Mir2Lir : public Backend { return code_buffer_.size() / sizeof(code_buffer_[0]); } + bool IsPseudoLirOp(int opcode) { + return (opcode < 0); + } + + /* + * LIR operands are 32-bit integers. Sometimes, (especially for managing + * instructions which require PC-relative fixups), we need the operands to carry + * pointers. To do this, we assign these pointers an index in pointer_storage_, and + * hold that index in the operand array. + * TUNING: If use of these utilities becomes more common on 32-bit builds, it + * may be worth conditionally-compiling a set of identity functions here. + */ + uint32_t WrapPointer(void* pointer) { + uint32_t res = pointer_storage_.Size(); + pointer_storage_.Insert(pointer); + return res; + } + + void* UnwrapPointer(size_t index) { + return pointer_storage_.Get(index); + } + + // strdup(), but allocates from the arena. + char* ArenaStrdup(const char* str) { + size_t len = strlen(str) + 1; + char* res = reinterpret_cast<char*>(arena_->Alloc(len, ArenaAllocator::kAllocMisc)); + if (res != NULL) { + strncpy(res, str, len); + } + return res; + } + // Shared by all targets - implemented in codegen_util.cc void AppendLIR(LIR* lir); void InsertLIRBefore(LIR* current_lir, LIR* new_lir); @@ -250,16 +309,15 @@ class Mir2Lir : public Backend { virtual void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); - bool FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put); + bool FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile); void SetupResourceMasks(LIR* lir); - void AssembleLIR(); void SetMemRefType(LIR* lir, bool is_load, int mem_type); void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit); void SetupRegMask(uint64_t* mask, int reg); void DumpLIRInsn(LIR* arg, unsigned char* base_addr); void DumpPromotionMap(); void CodegenDump(); - LIR* RawLIR(int dalvik_offset, int opcode, int op0 = 0, int op1 = 0, + LIR* RawLIR(DexOffset dalvik_offset, int opcode, int op0 = 0, int op1 = 0, int op2 = 0, int op3 = 0, int op4 = 0, LIR* target = NULL); LIR* NewLIR0(int opcode); LIR* NewLIR1(int opcode, int dest); @@ -274,13 +332,14 @@ class Mir2Lir : public Backend { void ProcessSwitchTables(); void DumpSparseSwitchTable(const uint16_t* table); void DumpPackedSwitchTable(const uint16_t* table); - LIR* MarkBoundary(int offset, const char* inst_str); + void MarkBoundary(DexOffset offset, const char* inst_str); void NopLIR(LIR* lir); + void UnlinkLIR(LIR* lir); bool EvaluateBranch(Instruction::Code opcode, int src1, int src2); bool IsInexpensiveConstant(RegLocation rl_src); ConditionCode FlipComparisonOrder(ConditionCode before); - void DumpMappingTable(const char* table_name, const std::string& descriptor, - const std::string& name, const std::string& signature, + void DumpMappingTable(const char* table_name, const char* descriptor, + const char* name, const Signature& signature, const std::vector<uint32_t>& v); void InstallLiteralPools(); void InstallSwitchTables(); @@ -288,21 +347,18 @@ class Mir2Lir : public Backend { bool VerifyCatchEntries(); void CreateMappingTables(); void CreateNativeGcMap(); - int AssignLiteralOffset(int offset); - int AssignSwitchTablesOffset(int offset); - int AssignFillArrayDataOffset(int offset); - int AssignInsnOffsets(); - void AssignOffsets(); - LIR* InsertCaseLabel(int vaddr, int keyVal); - void MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec); - void MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec); + int AssignLiteralOffset(CodeOffset offset); + int AssignSwitchTablesOffset(CodeOffset offset); + int AssignFillArrayDataOffset(CodeOffset offset); + LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); + void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec); + void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec); // Shared by all targets - implemented in local_optimizations.cc void ConvertMemOpIntoMove(LIR* orig_lir, int dest, int src); void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir); void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir); void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir); - void RemoveRedundantBranches(); // Shared by all targets - implemented in ralloc_util.cc int GetSRegHi(int lowSreg); @@ -324,11 +380,9 @@ class Mir2Lir : public Backend { void RecordCorePromotion(int reg, int s_reg); int AllocPreservedCoreReg(int s_reg); void RecordFpPromotion(int reg, int s_reg); - int AllocPreservedSingle(int s_reg, bool even); + int AllocPreservedSingle(int s_reg); int AllocPreservedDouble(int s_reg); - int AllocPreservedFPReg(int s_reg, bool double_start); - int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, - bool required); + int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required); int AllocTempDouble(); int AllocFreeTemp(); int AllocTemp(); @@ -367,13 +421,14 @@ class Mir2Lir : public Backend { RegLocation UpdateRawLoc(RegLocation loc); RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update); RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); - void CountRefs(RefCounts* core_counts, RefCounts* fp_counts); + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); void DumpCounts(const RefCounts* arr, int size, const char* msg); void DoPromotion(); int VRegOffset(int v_reg); int SRegOffset(int s_reg); RegLocation GetReturnWide(bool is_double); RegLocation GetReturn(bool is_float); + RegisterInfo* GetRegInfo(int reg); // Shared by all targets - implemented in gen_common.cc. bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, @@ -407,6 +462,9 @@ class Mir2Lir : public Backend { RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, bool is_object); void GenIPut(uint32_t field_idx, int opt_flags, OpSize size, RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, bool is_object); + void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src); + void GenConstClass(uint32_t type_idx, RegLocation rl_dest); void GenConstString(uint32_t string_idx, RegLocation rl_dest); void GenNewInstance(uint32_t type_idx, RegLocation rl_dest); @@ -463,6 +521,10 @@ class Mir2Lir : public Backend { void CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_offset, int arg0, RegLocation arg1, RegLocation arg2, bool safepoint_pc); + void CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset, + RegLocation arg0, RegLocation arg1, + RegLocation arg2, + bool safepoint_pc); void GenInvoke(CallInfo* info); void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, @@ -482,6 +544,7 @@ class Mir2Lir : public Backend { bool GenInlinedCharAt(CallInfo* info); bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty); + bool GenInlinedReverseBytes(CallInfo* info, OpSize size); bool GenInlinedAbsInt(CallInfo* info); bool GenInlinedAbsLong(CallInfo* info); bool GenInlinedFloatCvt(CallInfo* info); @@ -550,7 +613,6 @@ class Mir2Lir : public Backend { virtual int AllocTypedTempPair(bool fp_hint, int reg_class) = 0; virtual int S2d(int low_reg, int high_reg) = 0; virtual int TargetReg(SpecialTargetRegister reg) = 0; - virtual RegisterInfo* GetRegInfo(int reg) = 0; virtual RegLocation GetReturnAlt() = 0; virtual RegLocation GetReturnWideAlt() = 0; virtual RegLocation LocCReturn() = 0; @@ -570,9 +632,9 @@ class Mir2Lir : public Backend { virtual void CompilerInitializeRegAlloc() = 0; // Required for target - miscellaneous. - virtual AssemblerStatus AssembleInstructions(uintptr_t start_addr) = 0; + virtual void AssembleLIR() = 0; virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0; - virtual void SetupTargetResourceMasks(LIR* lir) = 0; + virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0; virtual const char* GetTargetInstFmt(int opcode) = 0; virtual const char* GetTargetInstName(int opcode) = 0; virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0; @@ -602,6 +664,8 @@ class Mir2Lir : public Backend { virtual bool GenInlinedCas32(CallInfo* info, bool need_write_barrier) = 0; virtual bool GenInlinedMinMaxInt(CallInfo* info, bool is_min) = 0; virtual bool GenInlinedSqrt(CallInfo* info) = 0; + virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0; + virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0; virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; @@ -621,46 +685,40 @@ class Mir2Lir : public Backend { virtual void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) = 0; virtual void GenExitSequence() = 0; - virtual void GenFillArrayData(uint32_t table_offset, + virtual void GenFillArrayData(DexOffset table_offset, RegLocation rl_src) = 0; virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) = 0; virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0; virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0; virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0; - virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src) = 0; - virtual void GenMonitorExit(int opt_flags, RegLocation rl_src) = 0; virtual void GenMoveException(RegLocation rl_dest) = 0; virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) = 0; virtual void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) = 0; - virtual void GenPackedSwitch(MIR* mir, uint32_t table_offset, + virtual void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; - virtual void GenSparseSwitch(MIR* mir, uint32_t table_offset, + virtual void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; virtual void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case) = 0; - virtual void GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) = 0; virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) = 0; virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) = 0; + RegLocation rl_index, RegLocation rl_src, int scale, + bool card_mark) = 0; virtual void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift) = 0; // Required for target - single operation generators. virtual LIR* OpUnconditionalBranch(LIR* target) = 0; - virtual LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, - LIR* target) = 0; - virtual LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, - LIR* target) = 0; + virtual LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) = 0; + virtual LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* target) = 0; virtual LIR* OpCondBranch(ConditionCode cc, LIR* target) = 0; - virtual LIR* OpDecAndBranch(ConditionCode c_code, int reg, - LIR* target) = 0; + virtual LIR* OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) = 0; virtual LIR* OpFpRegCopy(int r_dest, int r_src) = 0; virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0; virtual LIR* OpMem(OpKind op, int rBase, int disp) = 0; @@ -672,22 +730,23 @@ class Mir2Lir : public Backend { virtual LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset) = 0; virtual LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2) = 0; virtual LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) = 0; - virtual LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, - int r_src2) = 0; + virtual LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2) = 0; virtual LIR* OpTestSuspend(LIR* target) = 0; virtual LIR* OpThreadMem(OpKind op, ThreadOffset thread_offset) = 0; virtual LIR* OpVldm(int rBase, int count) = 0; virtual LIR* OpVstm(int rBase, int count) = 0; - virtual void OpLea(int rBase, int reg1, int reg2, int scale, - int offset) = 0; - virtual void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, - int src_hi) = 0; + virtual void OpLea(int rBase, int reg1, int reg2, int scale, int offset) = 0; + virtual void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, int src_hi) = 0; virtual void OpTlsCmp(ThreadOffset offset, int val) = 0; virtual bool InexpensiveConstantInt(int32_t value) = 0; virtual bool InexpensiveConstantFloat(int32_t value) = 0; virtual bool InexpensiveConstantLong(int64_t value) = 0; virtual bool InexpensiveConstantDouble(int64_t value) = 0; + // May be optimized by targets. + virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src); + virtual void GenMonitorExit(int opt_flags, RegLocation rl_src); + // Temp workaround void Workaround7250540(RegLocation rl_dest, int value); @@ -718,6 +777,7 @@ class Mir2Lir : public Backend { LIR* literal_list_; // Constants. LIR* method_literal_list_; // Method literals requiring patching. LIR* code_literal_list_; // Code literals requiring patching. + LIR* first_fixup_; // Doubly-linked list of LIR nodes requiring fixups. protected: CompilationUnit* const cu_; @@ -727,7 +787,9 @@ class Mir2Lir : public Backend { GrowableArray<LIR*> throw_launchpads_; GrowableArray<LIR*> suspend_launchpads_; GrowableArray<LIR*> intrinsic_launchpads_; - SafeMap<unsigned int, LIR*> boundary_map_; // boundary lookup cache. + GrowableArray<RegisterInfo*> tempreg_info_; + GrowableArray<RegisterInfo*> reginfo_map_; + GrowableArray<void*> pointer_storage_; /* * Holds mapping from native PC to dex PC for safepoints where we may deoptimize. * Native PC is on the return address of the safepointed operation. Dex PC is for @@ -739,8 +801,9 @@ class Mir2Lir : public Backend { * immediately preceed the instruction. */ std::vector<uint32_t> dex2pc_mapping_table_; - int data_offset_; // starting offset of literal pool. - int total_size_; // header + code size. + CodeOffset current_code_offset_; // Working byte offset of machine instructons. + CodeOffset data_offset_; // starting offset of literal pool. + size_t total_size_; // header + code size. LIR* block_label_list_; PromotionMap* promotion_map_; /* @@ -752,7 +815,8 @@ class Mir2Lir : public Backend { * in the CompilationUnit struct before codegen for each instruction. * The low-level LIR creation utilites will pull it from here. Rework this. */ - int current_dalvik_offset_; + DexOffset current_dalvik_offset_; + size_t estimated_native_code_size_; // Just an estimate; used to reserve code_buffer_ size. RegisterPool* reg_pool_; /* * Sanity checking for the register temp tracking. The same ssa diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 71b74a4a68..41a57afca1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -28,13 +28,9 @@ namespace art { * live until it is either explicitly killed or reallocated. */ void Mir2Lir::ResetRegPool() { - for (int i = 0; i < reg_pool_->num_core_regs; i++) { - if (reg_pool_->core_regs[i].is_temp) - reg_pool_->core_regs[i].in_use = false; - } - for (int i = 0; i < reg_pool_->num_fp_regs; i++) { - if (reg_pool_->FPRegs[i].is_temp) - reg_pool_->FPRegs[i].in_use = false; + GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_); + for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) { + info->in_use = false; } // Reset temp tracking sanity check. if (kIsDebugBuild) { @@ -48,13 +44,21 @@ void Mir2Lir::ResetRegPool() { */ void Mir2Lir::CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num) { for (int i = 0; i < num; i++) { - regs[i].reg = reg_nums[i]; + uint32_t reg_number = reg_nums[i]; + regs[i].reg = reg_number; regs[i].in_use = false; regs[i].is_temp = false; regs[i].pair = false; regs[i].live = false; regs[i].dirty = false; regs[i].s_reg = INVALID_SREG; + size_t map_size = reginfo_map_.Size(); + if (reg_number >= map_size) { + for (uint32_t i = 0; i < ((reg_number - map_size) + 1); i++) { + reginfo_map_.Insert(NULL); + } + } + reginfo_map_.Put(reg_number, ®s[i]); } } @@ -62,10 +66,9 @@ void Mir2Lir::DumpRegPool(RegisterInfo* p, int num_regs) { LOG(INFO) << "================================================"; for (int i = 0; i < num_regs; i++) { LOG(INFO) << StringPrintf( - "R[%d]: T:%d, U:%d, P:%d, p:%d, LV:%d, D:%d, SR:%d, ST:%x, EN:%x", + "R[%d]: T:%d, U:%d, P:%d, p:%d, LV:%d, D:%d, SR:%d", p[i].reg, p[i].is_temp, p[i].in_use, p[i].pair, p[i].partner, - p[i].live, p[i].dirty, p[i].s_reg, reinterpret_cast<uintptr_t>(p[i].def_start), - reinterpret_cast<uintptr_t>(p[i].def_end)); + p[i].live, p[i].dirty, p[i].s_reg); } LOG(INFO) << "================================================"; } @@ -170,17 +173,12 @@ void Mir2Lir::RecordFpPromotion(int reg, int s_reg) { promotion_map_[p_map_idx].FpReg = reg; } -/* - * Reserve a callee-save fp single register. Try to fullfill request for - * even/odd allocation, but go ahead and allocate anything if not - * available. If nothing's available, return -1. - */ -int Mir2Lir::AllocPreservedSingle(int s_reg, bool even) { - int res = -1; +// Reserve a callee-save fp single register. +int Mir2Lir::AllocPreservedSingle(int s_reg) { + int res = -1; // Return code if none available. RegisterInfo* FPRegs = reg_pool_->FPRegs; for (int i = 0; i < reg_pool_->num_fp_regs; i++) { - if (!FPRegs[i].is_temp && !FPRegs[i].in_use && - ((FPRegs[i].reg & 0x1) == 0) == even) { + if (!FPRegs[i].is_temp && !FPRegs[i].in_use) { res = FPRegs[i].reg; RecordFpPromotion(res, s_reg); break; @@ -246,26 +244,6 @@ int Mir2Lir::AllocPreservedDouble(int s_reg) { return res; } - -/* - * Reserve a callee-save fp register. If this register can be used - * as the first of a double, attempt to allocate an even pair of fp - * single regs (but if can't still attempt to allocate a single, preferring - * first to allocate an odd register. - */ -int Mir2Lir::AllocPreservedFPReg(int s_reg, bool double_start) { - int res = -1; - if (double_start) { - res = AllocPreservedDouble(s_reg); - } - if (res == -1) { - res = AllocPreservedSingle(s_reg, false /* try odd # */); - } - if (res == -1) - res = AllocPreservedSingle(s_reg, true /* try even # */); - return res; -} - int Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required) { int next = *next_temp; @@ -379,7 +357,7 @@ Mir2Lir::RegisterInfo* Mir2Lir::AllocLiveBody(RegisterInfo* p, int num_regs, int if (s_reg == -1) return NULL; for (int i = 0; i < num_regs; i++) { - if (p[i].live && (p[i].s_reg == s_reg)) { + if ((p[i].s_reg == s_reg) && p[i].live) { if (p[i].is_temp) p[i].in_use = true; return &p[i]; @@ -412,47 +390,16 @@ Mir2Lir::RegisterInfo* Mir2Lir::AllocLive(int s_reg, int reg_class) { } void Mir2Lir::FreeTemp(int reg) { - RegisterInfo* p = reg_pool_->core_regs; - int num_regs = reg_pool_->num_core_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - if (p[i].is_temp) { - p[i].in_use = false; - } - p[i].pair = false; - return; - } - } - p = reg_pool_->FPRegs; - num_regs = reg_pool_->num_fp_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - if (p[i].is_temp) { - p[i].in_use = false; - } - p[i].pair = false; - return; - } + RegisterInfo* p = GetRegInfo(reg); + if (p->is_temp) { + p->in_use = false; } - LOG(FATAL) << "Tried to free a non-existant temp: r" << reg; + p->pair = false; } Mir2Lir::RegisterInfo* Mir2Lir::IsLive(int reg) { - RegisterInfo* p = reg_pool_->core_regs; - int num_regs = reg_pool_->num_core_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - return p[i].live ? &p[i] : NULL; - } - } - p = reg_pool_->FPRegs; - num_regs = reg_pool_->num_fp_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - return p[i].live ? &p[i] : NULL; - } - } - return NULL; + RegisterInfo* p = GetRegInfo(reg); + return p->live ? p : NULL; } Mir2Lir::RegisterInfo* Mir2Lir::IsTemp(int reg) { @@ -476,27 +423,10 @@ bool Mir2Lir::IsDirty(int reg) { * allocated. Use with caution. */ void Mir2Lir::LockTemp(int reg) { - RegisterInfo* p = reg_pool_->core_regs; - int num_regs = reg_pool_->num_core_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - DCHECK(p[i].is_temp); - p[i].in_use = true; - p[i].live = false; - return; - } - } - p = reg_pool_->FPRegs; - num_regs = reg_pool_->num_fp_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - DCHECK(p[i].is_temp); - p[i].in_use = true; - p[i].live = false; - return; - } - } - LOG(FATAL) << "Tried to lock a non-existant temp: r" << reg; + RegisterInfo* p = GetRegInfo(reg); + DCHECK(p->is_temp); + p->in_use = true; + p->live = false; } void Mir2Lir::ResetDef(int reg) { @@ -599,11 +529,13 @@ void Mir2Lir::ResetDefTracking() { } void Mir2Lir::ClobberAllRegs() { - for (int i = 0; i< reg_pool_->num_core_regs; i++) { - ClobberBody(®_pool_->core_regs[i]); - } - for (int i = 0; i< reg_pool_->num_fp_regs; i++) { - ClobberBody(®_pool_->FPRegs[i]); + GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_); + for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) { + info->live = false; + info->s_reg = INVALID_SREG; + info->def_start = NULL; + info->def_end = NULL; + info->pair = false; } } @@ -659,11 +591,13 @@ void Mir2Lir::MarkLive(int reg, int s_reg) { void Mir2Lir::MarkTemp(int reg) { RegisterInfo* info = GetRegInfo(reg); + tempreg_info_.Insert(info); info->is_temp = true; } void Mir2Lir::UnmarkTemp(int reg) { RegisterInfo* info = GetRegInfo(reg); + tempreg_info_.Delete(info); info->is_temp = false; } @@ -834,9 +768,9 @@ RegLocation Mir2Lir::UpdateRawLoc(RegLocation loc) { RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) { DCHECK(loc.wide); - int new_regs; - int low_reg; - int high_reg; + int32_t new_regs; + int32_t low_reg; + int32_t high_reg; loc = UpdateLocWide(loc); @@ -912,18 +846,22 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) { } /* USE SSA names to count references of base Dalvik v_regs. */ -void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts) { +void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { RegLocation loc = mir_graph_->reg_location_[i]; RefCounts* counts = loc.fp ? fp_counts : core_counts; int p_map_idx = SRegToPMap(loc.s_reg_low); - // Don't count easily regenerated immediates - if (loc.fp || !IsInexpensiveConstant(loc)) { + if (loc.fp) { + if (loc.wide) { + // Treat doubles as a unit, using upper half of fp_counts array. + counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i); + i++; + } else { + counts[p_map_idx].count += mir_graph_->GetUseCount(i); + } + } else if (!IsInexpensiveConstant(loc)) { counts[p_map_idx].count += mir_graph_->GetUseCount(i); } - if (loc.wide && loc.fp && !loc.high_word) { - counts[p_map_idx].double_start = true; - } } } @@ -942,7 +880,11 @@ static int SortCounts(const void *val1, const void *val2) { void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) { LOG(INFO) << msg; for (int i = 0; i < size; i++) { - LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count; + if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) { + LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count; + } else { + LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count; + } } } @@ -965,7 +907,7 @@ void Mir2Lir::DoPromotion() { * count based on original Dalvik register name. Count refs * separately based on type in order to give allocation * preference to fp doubles - which must be allocated sequential - * physical single fp registers started with an even-numbered + * physical single fp registers starting with an even-numbered * reg. * TUNING: replace with linear scan once we have the ability * to describe register live ranges for GC. @@ -974,7 +916,7 @@ void Mir2Lir::DoPromotion() { static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs, ArenaAllocator::kAllocRegAlloc)); RefCounts *FpRegs = - static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs, + static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2, ArenaAllocator::kAllocRegAlloc)); // Set ssa names for original Dalvik registers for (int i = 0; i < dalvik_regs; i++) { @@ -982,46 +924,49 @@ void Mir2Lir::DoPromotion() { } // Set ssa name for Method* core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); - FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); // For consistecy + FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); // For consistecy. + FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg(); // for consistency. // Set ssa names for compiler_temps for (int i = 1; i <= cu_->num_compiler_temps; i++) { CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i); core_regs[dalvik_regs + i].s_reg = ct->s_reg; FpRegs[dalvik_regs + i].s_reg = ct->s_reg; + FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg; } - // Sum use counts of SSA regs by original Dalvik vreg. - CountRefs(core_regs, FpRegs); - - /* - * Ideally, we'd allocate doubles starting with an even-numbered - * register. Bias the counts to try to allocate any vreg that's - * used as the start of a pair first. - */ + // Duplicate in upper half to represent possible fp double starting sregs. for (int i = 0; i < num_regs; i++) { - if (FpRegs[i].double_start) { - FpRegs[i].count *= 2; - } + FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG; } + // Sum use counts of SSA regs by original Dalvik vreg. + CountRefs(core_regs, FpRegs, num_regs); + + // Sort the count arrays qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts); - qsort(FpRegs, num_regs, sizeof(RefCounts), SortCounts); + qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts); if (cu_->verbose) { DumpCounts(core_regs, num_regs, "Core regs after sort"); - DumpCounts(FpRegs, num_regs, "Fp regs after sort"); + DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort"); } if (!(cu_->disable_opt & (1 << kPromoteRegs))) { // Promote FpRegs - for (int i = 0; (i < num_regs) && (FpRegs[i].count >= promotion_threshold); i++) { - int p_map_idx = SRegToPMap(FpRegs[i].s_reg); - if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { - int reg = AllocPreservedFPReg(FpRegs[i].s_reg, - FpRegs[i].double_start); + for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) { + int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG); + if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) { + if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) && + (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) { + int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG; + // Ignore result - if can't alloc double may still be able to alloc singles. + AllocPreservedDouble(low_sreg); + } + } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { + int reg = AllocPreservedSingle(FpRegs[i].s_reg); if (reg < 0) { - break; // No more left + break; // No more left. } } } diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index e8834320a9..2047f30765 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -246,6 +246,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"), #undef UNARY_ENCODING_MAP + { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" }, + #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \ { kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ @@ -362,6 +364,7 @@ static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacemen } int X86Mir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode]; switch (entry->kind) { case kData: @@ -370,6 +373,8 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { return lir->operands[0]; // length of nop is sole operand case kNullary: return 1; // 1 byte of opcode + case kRegOpcode: // lir operands - 0: reg + return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm case kReg: // lir operands - 0: reg return ComputeSize(entry, 0, 0, false); case kMem: // lir operands - 0: base, 1: disp @@ -513,6 +518,33 @@ void X86Mir2Lir::EmitDisp(int base, int disp) { } } +void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + // There's no 3-byte instruction with +rd + DCHECK_NE(0x38, entry->skeleton.extra_opcode1); + DCHECK_NE(0x3A, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + DCHECK(!X86_FPREG(reg)); + DCHECK_LT(reg, 8); + code_buffer_.back() += reg; + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { if (entry->skeleton.prefix1 != 0) { code_buffer_.push_back(entry->skeleton.prefix1); @@ -525,7 +557,7 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -582,7 +614,7 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -595,7 +627,9 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, reg = reg & X86_FP_REG_MASK; } if (reg >= 4) { - DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg) + DCHECK(strchr(entry->name, '8') == NULL || + entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM) + << entry->name << " " << static_cast<int>(reg) << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); } DCHECK_LT(reg, 8); @@ -631,7 +665,7 @@ void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -672,7 +706,7 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int dis code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -712,7 +746,7 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t r code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -749,7 +783,7 @@ void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -808,7 +842,7 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -858,7 +892,7 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -923,7 +957,7 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i } if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -1037,7 +1071,7 @@ void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -1066,7 +1100,7 @@ void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -1089,11 +1123,13 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index, int scale, int table_or_disp) { int disp; if (entry->opcode == kX86PcRelLoadRA) { - Mir2Lir::SwitchTable *tab_rec = reinterpret_cast<Mir2Lir::SwitchTable*>(table_or_disp); + Mir2Lir::EmbeddedData *tab_rec = + reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(table_or_disp)); disp = tab_rec->offset; } else { DCHECK(entry->opcode == kX86PcRelAdr); - Mir2Lir::FillArrayData *tab_rec = reinterpret_cast<Mir2Lir::FillArrayData*>(base_or_table); + Mir2Lir::EmbeddedData *tab_rec = + reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table)); disp = tab_rec->offset; } if (entry->skeleton.prefix1 != 0) { @@ -1160,13 +1196,13 @@ void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { * instruction. In those cases we will try to substitute a new code * sequence or request that the trace be shortened and retried. */ -AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { +AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { LIR *lir; AssemblerStatus res = kSuccess; // Assume success const bool kVerbosePcFixup = false; for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - if (lir->opcode < 0) { + if (IsPseudoLirOp(lir->opcode)) { continue; } @@ -1174,19 +1210,19 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { continue; } - if (lir->flags.pcRelFixup) { + if (lir->flags.fixup != kFixupNone) { switch (lir->opcode) { case kX86Jcc8: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); int delta = 0; - uintptr_t pc; + CodeOffset pc; if (IS_SIMM8(lir->operands[0])) { pc = lir->offset + 2 /* opcode + rel8 */; } else { pc = lir->offset + 6 /* 2 byte opcode + rel32 */; } - uintptr_t target = target_lir->offset; + CodeOffset target = target_lir->offset; delta = target - pc; if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) { if (kVerbosePcFixup) { @@ -1210,8 +1246,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { case kX86Jcc32: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); - uintptr_t pc = lir->offset + 6 /* 2 byte opcode + rel32 */; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 6 /* 2 byte opcode + rel32 */; + CodeOffset target = target_lir->offset; int delta = target - pc; if (kVerbosePcFixup) { LOG(INFO) << "Source:"; @@ -1227,17 +1263,17 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); int delta = 0; - uintptr_t pc; + CodeOffset pc; if (IS_SIMM8(lir->operands[0])) { pc = lir->offset + 2 /* opcode + rel8 */; } else { pc = lir->offset + 5 /* opcode + rel32 */; } - uintptr_t target = target_lir->offset; + CodeOffset target = target_lir->offset; delta = target - pc; if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && delta == 0) { // Useless branch - lir->flags.is_nop = true; + NopLIR(lir); if (kVerbosePcFixup) { LOG(INFO) << "Retry for useless branch at " << lir->offset; } @@ -1256,8 +1292,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { case kX86Jmp32: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); - uintptr_t pc = lir->offset + 5 /* opcode + rel32 */; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 5 /* opcode + rel32 */; + CodeOffset target = target_lir->offset; int delta = target - pc; lir->operands[0] = delta; break; @@ -1298,6 +1334,9 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); break; + case kRegOpcode: // lir operands - 0: reg + EmitOpRegOpcode(entry, lir->operands[0]); + break; case kReg: // lir operands - 0: reg EmitOpReg(entry, lir->operands[0]); break; @@ -1385,4 +1424,101 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { return res; } +// LIR offset assignment. +// TODO: consolidate w/ Arm assembly mechanism. +int X86Mir2Lir::AssignInsnOffsets() { + LIR* lir; + int offset = 0; + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (LIKELY(!IsPseudoLirOp(lir->opcode))) { + if (!lir->flags.is_nop) { + offset += lir->flags.size; + } + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + if (offset & 0x2) { + offset += 2; + lir->operands[0] = 1; + } else { + lir->operands[0] = 0; + } + } + /* Pseudo opcodes don't consume space */ + } + return offset; +} + +/* + * Walk the compilation unit and assign offsets to instructions + * and literals and compute the total size of the compiled unit. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void X86Mir2Lir::AssignOffsets() { + int offset = AssignInsnOffsets(); + + /* Const values have to be word aligned */ + offset = (offset + 3) & ~3; + + /* Set up offsets for literals */ + data_offset_ = offset; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + offset = AssignFillArrayDataOffset(offset); + + total_size_ = offset; +} + +/* + * Go over each instruction in the list and calculate the offset from the top + * before sending them off to the assembler. If out-of-range branch distance is + * seen rearrange the instructions a bit to correct it. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void X86Mir2Lir::AssembleLIR() { + cu_->NewTimingSplit("Assemble"); + AssignOffsets(); + int assembler_retries = 0; + /* + * Assemble here. Note that we generate code with optimistic assumptions + * and if found now to work, we'll have to redo the sequence and retry. + */ + + while (true) { + AssemblerStatus res = AssembleInstructions(0); + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + // Redo offsets and try again + AssignOffsets(); + code_buffer_.clear(); + } + } + + cu_->NewTimingSplit("LiteralData"); + // Install literals + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); +} + } // namespace art diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 2be2aa9a0e..17924b0f08 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -31,15 +31,15 @@ void X86Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, * The sparse table in the literal pool is an array of <key,displacement> * pairs. */ -void X86Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, +void X86Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); } int entries = table[1]; - const int* keys = reinterpret_cast<const int*>(&table[2]); - const int* targets = &keys[entries]; + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; rl_src = LoadValue(rl_src, kCoreReg); for (int i = 0; i < entries; i++) { int key = keys[i]; @@ -66,15 +66,15 @@ void X86Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, * jmp r_start_of_method * done: */ -void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, +void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); } // Add the table to the list - we'll process it later - SwitchTable *tab_rec = - static_cast<SwitchTable *>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + SwitchTable* tab_rec = + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; @@ -103,8 +103,7 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, // Load the displacement from the switch table int disp_reg = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, disp_reg, start_of_method_reg, keyReg, 2, - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR5(kX86PcRelLoadRA, disp_reg, start_of_method_reg, keyReg, 2, WrapPointer(tab_rec)); // Add displacement to start of method OpRegReg(kOpAdd, start_of_method_reg, disp_reg); // ..and go! @@ -126,10 +125,10 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, * * Total size is 4+(width * size + 1)/2 16-bit code units. */ -void X86Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { +void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; // Add the table to the list - we'll process it later - FillArrayData *tab_rec = + FillArrayData* tab_rec = static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; @@ -144,49 +143,12 @@ void X86Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { LoadValueDirectFixed(rl_src, rX86_ARG0); // Materialize a pointer to the fill data image NewLIR1(kX86StartOfMethod, rX86_ARG2); - NewLIR2(kX86PcRelAdr, rX86_ARG1, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR2(kX86PcRelAdr, rX86_ARG1, WrapPointer(tab_rec)); NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2); CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData), rX86_ARG0, rX86_ARG1, true); } -void X86Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rCX); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rCX, opt_flags); - // If lock is unheld, try to grab it quickly with compare and exchange - // TODO: copy and clear hash state? - NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value()); - NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT); - NewLIR2(kX86Xor32RR, rAX, rAX); - NewLIR3(kX86LockCmpxchgMR, rCX, mirror::Object::MonitorOffset().Int32Value(), rDX); - LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); - // If lock is held, go the expensive route - artLockObjectFromCode(self, obj); - CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pLockObject), rCX, true); - branch->target = NewLIR0(kPseudoTargetLabel); -} - -void X86Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rAX); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rAX, opt_flags); - // If lock is held by the current thread, clear it to quickly release it - // TODO: clear hash state? - NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value()); - NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT); - NewLIR3(kX86Mov32RM, rCX, rAX, mirror::Object::MonitorOffset().Int32Value()); - OpRegReg(kOpSub, rCX, rDX); - LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); - NewLIR3(kX86Mov32MR, rAX, mirror::Object::MonitorOffset().Int32Value(), rCX); - LIR* branch2 = NewLIR1(kX86Jmp8, 0); - branch->target = NewLIR0(kPseudoTargetLabel); - // Otherwise, go the expensive route - UnlockObjectFromCode(obj); - CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rAX, true); - branch2->target = NewLIR0(kPseudoTargetLabel); -} - void X86Mir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 478654d0b4..1d6509eea5 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -52,7 +52,6 @@ class X86Mir2Lir : public Mir2Lir { int AllocTypedTempPair(bool fp_hint, int reg_class); int S2d(int low_reg, int high_reg); int TargetReg(SpecialTargetRegister reg); - RegisterInfo* GetRegInfo(int reg); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -72,9 +71,12 @@ class X86Mir2Lir : public Mir2Lir { void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. - AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void AssembleLIR(); + int AssignInsnOffsets(); + void AssignOffsets(); + AssemblerStatus AssembleInstructions(CodeOffset start_addr); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); @@ -86,14 +88,12 @@ class X86Mir2Lir : public Mir2Lir { // Required for target - Dalvik-level generators. void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_shift); + RegLocation rl_src1, RegLocation rl_shift); void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -107,6 +107,8 @@ class X86Mir2Lir : public Mir2Lir { bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -119,20 +121,18 @@ class X86Mir2Lir : public Mir2Lir { void GenDivZeroCheck(int reg_lo, int reg_hi); void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); void GenExitSequence(); - void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); void GenMemBarrier(MemBarrierKind barrier_kind); - void GenMonitorEnter(int opt_flags, RegLocation rl_src); - void GenMonitorExit(int opt_flags, RegLocation rl_src); void GenMoveException(RegLocation rl_dest); void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit); void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); // Single operation generators. @@ -172,6 +172,7 @@ class X86Mir2Lir : public Mir2Lir { private: void EmitDisp(int base, int disp); + void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg); void EmitOpReg(const X86EncodingMap* entry, uint8_t reg); void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp); void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index f736b5e28f..c9d6bfc8cc 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -284,8 +284,8 @@ void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) { - LIR* taken = &block_label_list_[bb->taken->id]; - LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; LIR* branch = NULL; RegLocation rl_src1; RegLocation rl_src2; diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 14be7dde90..499547bb37 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -166,7 +166,7 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { } void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { - LIR* taken = &block_label_list_[bb->taken->id]; + LIR* taken = &block_label_list_[bb->taken]; RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); FlushAllRegs(); @@ -236,6 +236,43 @@ bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { return true; } +bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == kLong) { + // Unaligned access is allowed on x86. + LoadBaseDispWide(rl_address.low_reg, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned access is allowed on x86. + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + } + return true; +} + +bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + if (size == kLong) { + // Unaligned access is allowed on x86. + RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); + StoreBaseDispWide(rl_address.low_reg, 0, rl_value.low_reg, rl_value.high_reg); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned access is allowed on x86. + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); + } + return true; +} + void X86Mir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { NewLIR5(kX86Lea32RA, rBase, reg1, reg2, scale, offset); } @@ -419,7 +456,7 @@ void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset thread_offse * Generate array load */ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale) { + RegLocation rl_index, RegLocation rl_dest, int scale) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -466,7 +503,7 @@ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, * */ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -502,59 +539,10 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg, rl_src.high_reg, size, INVALID_SREG); } -} - -/* - * Generate array store - * - */ -void X86Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { - int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); - - FlushAllRegs(); // Use explicit registers - LockCallTemps(); - - int r_value = TargetReg(kArg0); // Register holding value - int r_array_class = TargetReg(kArg1); // Register holding array's Class - int r_array = TargetReg(kArg2); // Register holding array - int r_index = TargetReg(kArg3); // Register holding index into array - - LoadValueDirectFixed(rl_array, r_array); // Grab array - LoadValueDirectFixed(rl_src, r_value); // Grab value - LoadValueDirectFixed(rl_index, r_index); // Grab index - - GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? - - // Store of null? - LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); - - // Get the array's class. - LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value, - r_array_class, true); - // Redo LoadValues in case they didn't survive the call. - LoadValueDirectFixed(rl_array, r_array); // Reload array - LoadValueDirectFixed(rl_index, r_index); // Reload index - LoadValueDirectFixed(rl_src, r_value); // Reload value - r_array_class = INVALID_REG; - - // Branch here if value to be stored == null - LIR* target = NewLIR0(kPseudoTargetLabel); - null_value_check->target = target; - - // make an extra temp available for card mark below - FreeTemp(TargetReg(kArg1)); - if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { - /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */ - GenRegMemCheck(kCondUge, r_index, r_array, len_offset, kThrowArrayBounds); - } - StoreBaseIndexedDisp(r_array, r_index, scale, - data_offset, r_value, INVALID_REG, kWord, INVALID_SREG); - FreeTemp(r_index); - if (!mir_graph_->IsConstantNullRef(rl_src)) { - MarkGCCard(r_value, r_array); + if (card_mark) { + // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. + FreeTemp(rl_index.low_reg); + MarkGCCard(rl_src.low_reg, rl_array.low_reg); } } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 26accab360..878fa769b6 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -85,6 +85,8 @@ int X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res = rX86_RET0; break; case kRet1: res = rX86_RET1; break; case kInvokeTgt: res = rX86_INVOKE_TGT; break; + case kHiddenArg: res = rAX; break; + case kHiddenFpArg: res = fr0; break; case kCount: res = rX86_COUNT; break; } return res; @@ -132,37 +134,36 @@ uint64_t X86Mir2Lir::GetPCUseDefEncoding() { return 0ULL; } -void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir) { +void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { DCHECK_EQ(cu_->instruction_set, kX86); + DCHECK(!lir->flags.use_def_invalid); // X86-specific resource map setup here. - uint64_t flags = X86Mir2Lir::EncodingMap[lir->opcode].flags; - if (flags & REG_USE_SP) { - lir->use_mask |= ENCODE_X86_REG_SP; + lir->u.m.use_mask |= ENCODE_X86_REG_SP; } if (flags & REG_DEF_SP) { - lir->def_mask |= ENCODE_X86_REG_SP; + lir->u.m.def_mask |= ENCODE_X86_REG_SP; } if (flags & REG_DEFA) { - SetupRegMask(&lir->def_mask, rAX); + SetupRegMask(&lir->u.m.def_mask, rAX); } if (flags & REG_DEFD) { - SetupRegMask(&lir->def_mask, rDX); + SetupRegMask(&lir->u.m.def_mask, rDX); } if (flags & REG_USEA) { - SetupRegMask(&lir->use_mask, rAX); + SetupRegMask(&lir->u.m.use_mask, rAX); } if (flags & REG_USEC) { - SetupRegMask(&lir->use_mask, rCX); + SetupRegMask(&lir->u.m.use_mask, rCX); } if (flags & REG_USED) { - SetupRegMask(&lir->use_mask, rDX); + SetupRegMask(&lir->u.m.use_mask, rDX); } } @@ -224,7 +225,7 @@ std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char buf += StringPrintf("%d", operand); break; case 'p': { - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(operand); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand)); buf += StringPrintf("0x%08x", tab_rec->offset); break; } @@ -239,7 +240,7 @@ std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char break; case 't': buf += StringPrintf("0x%08x (L%p)", - reinterpret_cast<uint32_t>(base_addr) + reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand, lir->target); break; default: @@ -275,8 +276,8 @@ void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix } /* Memory bits */ if (x86LIR && (mask & ENCODE_DALVIK_REG)) { - sprintf(buf + strlen(buf), "dr%d%s", x86LIR->alias_info & 0xffff, - (x86LIR->alias_info & 0x80000000) ? "(+1)" : ""); + sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info), + (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : ""); } if (mask & ENCODE_LITERAL) { strcat(buf, "lit "); @@ -375,11 +376,6 @@ RegLocation X86Mir2Lir::GetReturnAlt() { return res; } -X86Mir2Lir::RegisterInfo* X86Mir2Lir::GetRegInfo(int reg) { - return X86_FPREG(reg) ? ®_pool_->FPRegs[reg & X86_FP_REG_MASK] - : ®_pool_->core_regs[reg]; -} - /* To be used when explicitly managing register use */ void X86Mir2Lir::LockCallTemps() { LockTemp(rX86_ARG0); @@ -530,14 +526,17 @@ int X86Mir2Lir::LoadHelper(ThreadOffset offset) { } uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return X86Mir2Lir::EncodingMap[opcode].flags; } const char* X86Mir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return X86Mir2Lir::EncodingMap[opcode].name; } const char* X86Mir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return X86Mir2Lir::EncodingMap[opcode].fmt; } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index c519bfec44..6ec7ebb91a 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -117,6 +117,7 @@ LIR* X86Mir2Lir::OpReg(OpKind op, int r_dest_src) { switch (op) { case kOpNeg: opcode = kX86Neg32R; break; case kOpNot: opcode = kX86Not32R; break; + case kOpRev: opcode = kX86Bswap32R; break; case kOpBlx: opcode = kX86CallR; break; default: LOG(FATAL) << "Bad case in OpReg " << op; @@ -161,6 +162,13 @@ LIR* X86Mir2Lir::OpRegReg(OpKind op, int r_dest_src1, int r_src2) { case kOpNeg: OpRegCopy(r_dest_src1, r_src2); return OpReg(kOpNeg, r_dest_src1); + case kOpRev: + OpRegCopy(r_dest_src1, r_src2); + return OpReg(kOpRev, r_dest_src1); + case kOpRevsh: + OpRegCopy(r_dest_src1, r_src2); + OpReg(kOpRev, r_dest_src1); + return OpRegImm(kOpAsr, r_dest_src1, 16); // X86 binary opcodes case kOpSub: opcode = kX86Sub32RR; break; case kOpSbc: opcode = kX86Sbb32RR; break; diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 643a3d5b8f..3518131cfe 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -243,7 +243,7 @@ enum X86OpCode { // - lir operands - 0: base, 1: disp, 2: immediate // AI - Array Immediate - opcode [base + index * scale + disp], #immediate // - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate - // TI - Thread Register - opcode fs:[disp], imm - where fs: is equal to Thread::Current() + // TI - Thread Immediate - opcode fs:[disp], imm - where fs: is equal to Thread::Current() // - lir operands - 0: disp, 1: imm #define BinaryOpCode(opcode) \ opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \ @@ -313,6 +313,7 @@ enum X86OpCode { UnaryOpcode(kX86Imul, DaR, DaM, DaA), UnaryOpcode(kX86Divmod, DaR, DaM, DaA), UnaryOpcode(kX86Idivmod, DaR, DaM, DaA), + kX86Bswap32R, #undef UnaryOpcode #define Binary0fOpCode(opcode) \ opcode ## RR, opcode ## RM, opcode ## RA @@ -381,6 +382,7 @@ enum X86EncodingKind { kData, // Special case for raw data. kNop, // Special case for variable length nop. kNullary, // Opcode that takes no arguments. + kRegOpcode, // Shorter form of R instruction kind (opcode+rd) kReg, kMem, kArray, // R, M and A instruction kinds. kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds. kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds. |