diff options
Diffstat (limited to 'src/compiler/codegen')
21 files changed, 822 insertions, 277 deletions
diff --git a/src/compiler/codegen/arm/arm_lir.h b/src/compiler/codegen/arm/arm_lir.h index 3fc87924cf..c41f53bf9a 100644 --- a/src/compiler/codegen/arm/arm_lir.h +++ b/src/compiler/codegen/arm/arm_lir.h @@ -371,7 +371,7 @@ enum ArmOpcode { kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0]. kThumb2Pop, // pop [1110100010111101] list[15-0]*/ kThumb2Push, // push [1110100100101101] list[15-0]*/ - kThumb2CmpRI8, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. + kThumb2CmpRI12, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. @@ -445,6 +445,9 @@ enum ArmOpcode { kThumb2Pop1, // t3 encoding of pop. kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. + kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024. + kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024]. + kThumb2StrdI8, // strd rt, rt2, [rn +-/1024]. kArmLast, }; diff --git a/src/compiler/codegen/arm/assemble_arm.cc b/src/compiler/codegen/arm/assemble_arm.cc index 91f25d68e6..455ea67577 100644 --- a/src/compiler/codegen/arm/assemble_arm.cc +++ b/src/compiler/codegen/arm/assemble_arm.cc @@ -646,7 +646,7 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4), - ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00, + ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, @@ -917,8 +917,8 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { "b", "!0t", 4), ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "movh", "!0C, #!1M", 4), + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, + "movt", "!0C, #!1M", 4), ENCODING_MAP(kThumb2AddPCR, 0x4487, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, @@ -936,8 +936,8 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { "mov", "!0C, #!1M", 4), ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, - "movh", "!0C, #!1M", 4), + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, + "movt", "!0C, #!1M", 4), ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, @@ -972,7 +972,21 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { kFmtBitBlt, 3, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, "smull", "!0C, !1C, !2C, !3C", 4), - + ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldrd", "!0C, !1C, [pc, #!2E]", 4), + ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, + "ldrd", "!0C, !1C, [!2C, #!3E]", 4), + ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, + "strd", "!0C, !1C, [!2C, #!3E]", 4), }; /* @@ -1023,13 +1037,14 @@ AssemblerStatus ArmCodegen::AssembleInstructions(CompilationUnit* cu, uintptr_t if (lir->opcode == kThumbLdrPcRel || lir->opcode == kThumb2LdrPcRel12 || lir->opcode == kThumbAddPcRel || + lir->opcode == kThumb2LdrdPcRel8 || ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { /* * PC-relative loads are mostly used to load immediates * that are too large to materialize directly in one shot. * However, if the load displacement exceeds the limit, - * we revert to a 2-instruction materialization sequence. + * we revert to a multiple-instruction materialization sequence. */ LIR *lir_target = lir->target; uintptr_t pc = (lir->offset + 4) & ~3; @@ -1044,8 +1059,9 @@ AssemblerStatus ArmCodegen::AssembleInstructions(CompilationUnit* cu, uintptr_t // Shouldn't happen in current codegen. LOG(FATAL) << "Unexpected pc-rel offset " << delta; } - // Now, check for the two difficult cases + // Now, check for the difficult cases if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { /* @@ -1053,26 +1069,34 @@ AssemblerStatus ArmCodegen::AssembleInstructions(CompilationUnit* cu, uintptr_t * vldrs/vldrd we include REG_DEF_LR in the resource * masks for these instructions. */ - int base_reg = (lir->opcode == kThumb2LdrPcRel12) ? - lir->operands[0] : rARM_LR; + int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12)) + ? lir->operands[0] : rARM_LR; - // Add new Adr to generate the address + // Add new Adr to generate the address. LIR* new_adr = RawLIR(cu, lir->dalvik_offset, kThumb2Adr, base_reg, 0, 0, 0, 0, lir->target); InsertLIRBefore(lir, new_adr); - // Convert to normal load + // Convert to normal load. if (lir->opcode == kThumb2LdrPcRel12) { lir->opcode = kThumb2LdrRRI12; + } else if (lir->opcode == kThumb2LdrdPcRel8) { + lir->opcode = kThumb2LdrdI8; + } + // Change the load to be relative to the new Adr base. + if (lir->opcode == kThumb2LdrdI8) { + lir->operands[3] = 0; + lir->operands[2] = base_reg; + } else { + lir->operands[2] = 0; + lir->operands[1] = base_reg; } - // Change the load to be relative to the new Adr base - lir->operands[1] = base_reg; - lir->operands[2] = 0; SetupResourceMasks(cu, lir); res = kRetryAll; } else { if ((lir->opcode == kThumb2Vldrs) || - (lir->opcode == kThumb2Vldrd)) { + (lir->opcode == kThumb2Vldrd) || + (lir->opcode == kThumb2LdrdPcRel8)) { lir->operands[2] = delta >> 2; } else { lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h index ea34ff2b26..4dadd6c821 100644 --- a/src/compiler/codegen/arm/codegen_arm.h +++ b/src/compiler/codegen/arm/codegen_arm.h @@ -37,8 +37,7 @@ class ArmCodegen : public Codegen { int displacement, int r_dest, int r_dest_hi, OpSize size, int s_reg); virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value); - virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi); + virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value); virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, OpSize size); virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo, @@ -89,12 +88,18 @@ class ArmCodegen : public Codegen { virtual bool IsUnconditionalBranch(LIR* lir); // Required for target - Dalvik-level generators. + virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); + virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); + virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, @@ -197,7 +202,14 @@ class ArmCodegen : public Codegen { static int EncodeShift(int code, int amount); static int ModifiedImmediate(uint32_t value); static ArmConditionCode ArmConditionEncoding(ConditionCode code); - bool InexpensiveConstant(int reg, int value); + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + private: + void GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode); }; } // namespace art diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc index fcf74f1471..5a9786c0d6 100644 --- a/src/compiler/codegen/arm/int_arm.cc +++ b/src/compiler/codegen/arm/int_arm.cc @@ -121,16 +121,81 @@ void ArmCodegen::GenCmpLong(CompilationUnit* cu, RegLocation rl_dest, RegLocatio branch3->target = branch1->target; } -void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir) +void ArmCodegen::GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode) { + int32_t val_lo = Low32Bits(val); + int32_t val_hi = High32Bits(val); + DCHECK(ModifiedImmediate(val_lo) >= 0); + DCHECK(ModifiedImmediate(val_hi) >= 0); LIR* label_list = cu->block_label_list; LIR* taken = &label_list[bb->taken->id]; LIR* not_taken = &label_list[bb->fall_through->id]; + rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); + int32_t low_reg = rl_src1.low_reg; + int32_t high_reg = rl_src1.high_reg; + + switch(ccode) { + case kCondEq: + OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, not_taken); + break; + case kCondNe: + OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, taken); + break; + case kCondLt: + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken); + ccode = kCondCc; + break; + case kCondLe: + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken); + ccode = kCondLs; + break; + case kCondGt: + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken); + ccode = kCondHi; + break; + case kCondGe: + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken); + ccode = kCondCs; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCmpImmBranch(cu, ccode, low_reg, val_lo, taken); +} + + +void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir) +{ RegLocation rl_src1 = GetSrcWide(cu, mir, 0); RegLocation rl_src2 = GetSrcWide(cu, mir, 2); + // Normalize such that if either operand is constant, src2 will be constant. + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + if (rl_src1.is_const) { + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + ccode = FlipComparisonOrder(ccode); + } + if (rl_src2.is_const) { + RegLocation rl_temp = UpdateLocWide(cu, rl_src2); + // Do special compare/branch against simple const operand if not already in registers. + int64_t val = ConstantValueWide(cu, rl_src2); + if ((rl_temp.location != kLocPhysReg) && + ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { + GenFusedLongCmpImmBranch(cu, bb, rl_src1, val, ccode); + return; + } + } + LIR* label_list = cu->block_label_list; + LIR* taken = &label_list[bb->taken->id]; + LIR* not_taken = &label_list[bb->fall_through->id]; rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg); - ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); OpRegReg(cu, kOpCmp, rl_src1.high_reg, rl_src2.high_reg); switch(ccode) { case kCondEq: @@ -185,7 +250,7 @@ LIR* ArmCodegen::OpCmpImmBranch(CompilationUnit* cu, ConditionCode cond, int reg if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) { NewLIR2(cu, kThumbCmpRI8, reg, check_value); } else if (mod_imm >= 0) { - NewLIR2(cu, kThumb2CmpRI8, reg, mod_imm); + NewLIR2(cu, kThumb2CmpRI12, reg, mod_imm); } else { int t_reg = AllocTemp(cu); LoadConstant(cu, t_reg, check_value); @@ -523,6 +588,93 @@ bool ArmCodegen::GenNegLong(CompilationUnit* cu, RegLocation rl_dest, RegLocatio return false; } + + /* + * Check to see if a result pair has a misaligned overlap with an operand pair. This + * is not usual for dx to generate, but it is legal (for now). In a future rev of + * dex, we'll want to make this case illegal. + */ +static bool BadOverlap(CompilationUnit* cu, RegLocation rl_src, RegLocation rl_dest) +{ + DCHECK(rl_src.wide); + DCHECK(rl_dest.wide); + return (abs(SRegToVReg(cu, rl_src.s_reg_low) - SRegToVReg(cu, rl_dest.s_reg_low)) == 1); +} + +void ArmCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + /* + * To pull off inline multiply, we have a worst-case requirement of 8 temporary + * registers. Normally for Arm, we get 5. We can get to 6 by including + * lr in the temp set. The only problematic case is all operands and result are + * distinct, and none have been promoted. In that case, we can succeed by aggressively + * freeing operand temp registers after they are no longer needed. All other cases + * can proceed normally. We'll just punt on the case of the result having a misaligned + * overlap with either operand and send that case to a runtime handler. + */ + RegLocation rl_result; + if (BadOverlap(cu, rl_src1, rl_dest) || (BadOverlap(cu, rl_src2, rl_dest))) { + int func_offset = ENTRYPOINT_OFFSET(pLmul); + FlushAllRegs(cu); + CallRuntimeHelperRegLocationRegLocation(cu, func_offset, rl_src1, rl_src2, false); + rl_result = GetReturnWide(cu, false); + StoreValueWide(cu, rl_dest, rl_result); + return; + } + // Temporarily add LR to the temp pool, and assign it to tmp1 + MarkTemp(cu, rARM_LR); + FreeTemp(cu, rARM_LR); + int tmp1 = rARM_LR; + LockTemp(cu, rARM_LR); + + rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); + rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg); + + bool special_case = true; + // If operands are the same, or any pair has been promoted we're not the special case. + if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || + (!IsTemp(cu, rl_src1.low_reg) && !IsTemp(cu, rl_src1.high_reg)) || + (!IsTemp(cu, rl_src2.low_reg) && !IsTemp(cu, rl_src2.high_reg))) { + special_case = false; + } + // Tuning: if rl_dest has been promoted and is *not* either operand, could use directly. + int res_lo = AllocTemp(cu); + int res_hi; + if (rl_src1.low_reg == rl_src2.low_reg) { + res_hi = AllocTemp(cu); + NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src1.low_reg, rl_src1.high_reg); + NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src1.low_reg, rl_src1.low_reg); + OpRegRegRegShift(cu, kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); + } else { + // In the special case, all temps are now allocated + NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src2.low_reg, rl_src1.high_reg); + if (special_case) { + DCHECK_NE(rl_src1.low_reg, rl_src2.low_reg); + DCHECK_NE(rl_src1.high_reg, rl_src2.high_reg); + FreeTemp(cu, rl_src1.high_reg); + } + res_hi = AllocTemp(cu); + + NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src2.low_reg, rl_src1.low_reg); + NewLIR4(cu, kThumb2Mla, tmp1, rl_src1.low_reg, rl_src2.high_reg, tmp1); + NewLIR4(cu, kThumb2AddRRR, res_hi, tmp1, res_hi, 0); + if (special_case) { + FreeTemp(cu, rl_src1.low_reg); + Clobber(cu, rl_src1.low_reg); + Clobber(cu, rl_src1.high_reg); + } + } + FreeTemp(cu, tmp1); + rl_result = GetReturnWide(cu, false); // Just using as a template. + rl_result.low_reg = res_lo; + rl_result.high_reg = res_hi; + StoreValueWide(cu, rl_dest, rl_result); + // Now, restore lr to its non-temp status. + Clobber(cu, rARM_LR); + UnmarkTemp(cu, rARM_LR); +} + bool ArmCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { @@ -568,8 +720,11 @@ void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, Re int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; RegLocation rl_result; + bool constant_index = rl_index.is_const; rl_array = LoadValue(cu, rl_array, kCoreReg); - rl_index = LoadValue(cu, rl_index, kCoreReg); + if (!constant_index) { + rl_index = LoadValue(cu, rl_index, kCoreReg); + } if (rl_dest.wide) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); @@ -577,6 +732,11 @@ void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, Re data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } + // If index is constant, just fold it into the data offset + if (constant_index) { + data_offset += ConstantValue(cu, rl_index) << scale; + } + /* null object? */ GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags); @@ -587,27 +747,38 @@ void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, Re /* Get len */ LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len); } - if (rl_dest.wide || rl_dest.fp) { - // No special indexed operation, lea + load w/ displacement - int reg_ptr = AllocTemp(cu); - OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, - EncodeShift(kArmLsl, scale)); - FreeTemp(cu, rl_index.low_reg); + if (rl_dest.wide || rl_dest.fp || constant_index) { + int reg_ptr; + if (constant_index) { + reg_ptr = rl_array.low_reg; // NOTE: must not alter reg_ptr in constant case. + } else { + // No special indexed operation, lea + load w/ displacement + reg_ptr = AllocTemp(cu); + OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, + EncodeShift(kArmLsl, scale)); + FreeTemp(cu, rl_index.low_reg); + } rl_result = EvalLoc(cu, rl_dest, reg_class, true); if (needs_range_check) { - // TODO: change kCondCS to a more meaningful name, is the sense of - // carry-set/clear flipped? - GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + if (constant_index) { + GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds); + } else { + GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds); + } FreeTemp(cu, reg_len); } if (rl_dest.wide) { LoadBaseDispWide(cu, reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); - FreeTemp(cu, reg_ptr); + if (!constant_index) { + FreeTemp(cu, reg_ptr); + } StoreValueWide(cu, rl_dest, rl_result); } else { LoadBaseDisp(cu, reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG); - FreeTemp(cu, reg_ptr); + if (!constant_index) { + FreeTemp(cu, reg_ptr); + } StoreValue(cu, rl_dest, rl_result); } } else { @@ -639,17 +810,28 @@ void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, Re RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; + bool constant_index = rl_index.is_const; - if (size == kLong || size == kDouble) { + if (rl_src.wide) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } + // If index is constant, just fold it into the data offset. + if (constant_index) { + data_offset += ConstantValue(cu, rl_index) << scale; + } + rl_array = LoadValue(cu, rl_array, kCoreReg); - rl_index = LoadValue(cu, rl_index, kCoreReg); - int reg_ptr = INVALID_REG; - if (IsTemp(cu, rl_array.low_reg)) { + if (!constant_index) { + rl_index = LoadValue(cu, rl_index, kCoreReg); + } + + int reg_ptr; + if (constant_index) { + reg_ptr = rl_array.low_reg; + } else if (IsTemp(cu, rl_array.low_reg)) { Clobber(cu, rl_array.low_reg); reg_ptr = rl_array.low_reg; } else { @@ -668,18 +850,25 @@ void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, Re LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len); } /* at this point, reg_ptr points to array, 2 live temps */ - if (rl_src.wide || rl_src.fp) { + if (rl_src.wide || rl_src.fp || constant_index) { if (rl_src.wide) { rl_src = LoadValueWide(cu, rl_src, reg_class); } else { rl_src = LoadValue(cu, rl_src, reg_class); } - OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, - EncodeShift(kArmLsl, scale)); + if (!constant_index) { + OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, + EncodeShift(kArmLsl, scale)); + } if (needs_range_check) { - GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + if (constant_index) { + GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds); + } else { + GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds); + } FreeTemp(cu, reg_len); } + if (rl_src.wide) { StoreBaseDispWide(cu, reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg); } else { @@ -696,7 +885,9 @@ void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, Re StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg, scale, size); } - FreeTemp(cu, reg_ptr); + if (!constant_index) { + FreeTemp(cu, reg_ptr); + } } /* @@ -758,4 +949,163 @@ void ArmCodegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation MarkGCCard(cu, r_value, r_array); } +bool ArmCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) +{ + rl_src = LoadValueWide(cu, rl_src, kCoreReg); + // Per spec, we only care about low 6 bits of shift amount. + int shift_amount = ConstantValue(cu, rl_shift) & 0x3f; + if (shift_amount == 0) { + StoreValueWide(cu, rl_dest, rl_src); + return false; // TODO: remove useless bool return result. + } + if (BadOverlap(cu, rl_src, rl_dest)) { + return GenShiftOpLong(cu, opcode, rl_dest, rl_src, rl_shift); + } + RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true); + switch(opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + if (shift_amount == 1) { + OpRegRegReg(cu, kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg); + OpRegRegReg(cu, kOpAdc, rl_result.high_reg, rl_src.high_reg, rl_src.high_reg); + } else if (shift_amount == 32) { + OpRegCopy(cu, rl_result.high_reg, rl_src.low_reg); + LoadConstant(cu, rl_result.low_reg, 0); + } else if (shift_amount > 31) { + OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.low_reg, shift_amount - 32); + LoadConstant(cu, rl_result.low_reg, 0); + } else { + OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.high_reg, shift_amount); + OpRegRegRegShift(cu, kOpOr, rl_result.high_reg, rl_result.high_reg, rl_src.low_reg, + EncodeShift(kArmLsr, 32 - shift_amount)); + OpRegRegImm(cu, kOpLsl, rl_result.low_reg, rl_src.low_reg, shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg); + OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31); + } else if (shift_amount > 31) { + OpRegRegImm(cu, kOpAsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32); + OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31); + } else { + int t_reg = AllocTemp(cu); + OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount); + OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg, + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(cu, t_reg); + OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg); + LoadConstant(cu, rl_result.high_reg, 0); + } else if (shift_amount > 31) { + OpRegRegImm(cu, kOpLsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32); + LoadConstant(cu, rl_result.high_reg, 0); + } else { + int t_reg = AllocTemp(cu); + OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount); + OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg, + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(cu, t_reg); + OpRegRegImm(cu, kOpLsr, rl_result.high_reg, rl_src.high_reg, shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + return true; + } + StoreValueWide(cu, rl_dest, rl_result); + return false; +} + +bool ArmCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { + if (!rl_src2.is_const) { + // Don't bother with special handling for subtract from immediate. + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); + } + } else { + // Normalize + if (!rl_src2.is_const) { + DCHECK(rl_src1.is_const); + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + } + } + if (BadOverlap(cu, rl_src1, rl_dest)) { + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); + } + DCHECK(rl_src2.is_const); + int64_t val = ConstantValueWide(cu, rl_src2); + uint32_t val_lo = Low32Bits(val); + uint32_t val_hi = High32Bits(val); + int32_t mod_imm_lo = ModifiedImmediate(val_lo); + int32_t mod_imm_hi = ModifiedImmediate(val_hi); + + // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit + switch(opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); + } + break; + default: + break; + } + rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); + RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true); + // NOTE: once we've done the EvalLoc on dest, we can no longer bail. + switch (opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + NewLIR3(cu, kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo); + NewLIR3(cu, kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi); + break; + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + if ((val_lo != 0) || (rl_result.low_reg != rl_src1.low_reg)) { + OpRegRegImm(cu, kOpOr, rl_result.low_reg, rl_src1.low_reg, val_lo); + } + if ((val_hi != 0) || (rl_result.high_reg != rl_src1.high_reg)) { + OpRegRegImm(cu, kOpOr, rl_result.high_reg, rl_src1.high_reg, val_hi); + } + break; + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + OpRegRegImm(cu, kOpXor, rl_result.low_reg, rl_src1.low_reg, val_lo); + OpRegRegImm(cu, kOpXor, rl_result.high_reg, rl_src1.high_reg, val_hi); + break; + case Instruction::AND_LONG: + case Instruction::AND_LONG_2ADDR: + if ((val_lo != 0xffffffff) || (rl_result.low_reg != rl_src1.low_reg)) { + OpRegRegImm(cu, kOpAnd, rl_result.low_reg, rl_src1.low_reg, val_lo); + } + if ((val_hi != 0xffffffff) || (rl_result.high_reg != rl_src1.high_reg)) { + OpRegRegImm(cu, kOpAnd, rl_result.high_reg, rl_src1.high_reg, val_hi); + } + break; + case Instruction::SUB_LONG_2ADDR: + case Instruction::SUB_LONG: + NewLIR3(cu, kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo); + NewLIR3(cu, kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi); + break; + default: + LOG(FATAL) << "Unexpected opcode " << opcode; + } + StoreValueWide(cu, rl_dest, rl_result); + return false; // TODO: remove bool return value from all of these Gen routines. +} + } // namespace art diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc index 433111c528..a670199580 100644 --- a/src/compiler/codegen/arm/utility_arm.cc +++ b/src/compiler/codegen/arm/utility_arm.cc @@ -45,6 +45,32 @@ static int EncodeImmSingle(int value) return res; } +/* + * Determine whether value can be encoded as a Thumb2 floating point + * immediate. If not, return -1. If so return encoded 8-bit value. + */ +static int EncodeImmDouble(int64_t value) +{ + int res; + int bit_a = (value & 0x8000000000000000ll) >> 63; + int not_bit_b = (value & 0x4000000000000000ll) >> 62; + int bit_b = (value & 0x2000000000000000ll) >> 61; + int b_smear = (value & 0x3fc0000000000000ll) >> 54; + int slice = (value & 0x003f000000000000ll) >> 48; + uint64_t zeroes = (value & 0x0000ffffffffffffll); + if (zeroes != 0) + return -1; + if (bit_b) { + if ((not_bit_b != 0) || (b_smear != 0xff)) + return -1; + } else { + if ((not_bit_b != 1) || (b_smear != 0x0)) + return -1; + } + res = (bit_a << 7) | (bit_b << 6) | slice; + return res; +} + static LIR* LoadFPConstantValue(CompilationUnit* cu, int r_dest, int value) { DCHECK(ARM_SINGLEREG(r_dest)); @@ -126,19 +152,24 @@ int ArmCodegen::ModifiedImmediate(uint32_t value) return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */ } -bool ArmCodegen::InexpensiveConstant(int reg, int value) +bool ArmCodegen::InexpensiveConstantInt(int32_t value) { - bool res = false; - if (ARM_FPREG(reg)) { - res = (EncodeImmSingle(value) >= 0); - } else { - if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) { - res = true; - } else { - res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); - } - } - return res; + return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); +} + +bool ArmCodegen::InexpensiveConstantFloat(int32_t value) +{ + return EncodeImmSingle(value) >= 0; +} + +bool ArmCodegen::InexpensiveConstantLong(int64_t value) +{ + return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); +} + +bool ArmCodegen::InexpensiveConstantDouble(int64_t value) +{ + return EncodeImmDouble(value) >= 0; } /* @@ -178,25 +209,9 @@ LIR* ArmCodegen::LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int valu res = NewLIR2(cu, kThumb2MovImm16, r_dest, value); return res; } - /* No shortcut - go ahead and use literal pool */ - LIR* data_target = ScanLiteralPool(cu->literal_list, value, 0); - if (data_target == NULL) { - data_target = AddWordData(cu, &cu->literal_list, value); - } - LIR* load_pc_rel = RawLIR(cu, cu->current_dalvik_offset, - kThumb2LdrPcRel12, r_dest, 0, 0, 0, 0, data_target); - SetMemRefType(cu, load_pc_rel, true, kLiteral); - load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); - res = load_pc_rel; - AppendLIR(cu, load_pc_rel); - - /* - * To save space in the constant pool, we use the ADD_RRI8 instruction to - * add up to 255 to an existing constant value. - */ - if (data_target->operands[0] != value) { - OpRegImm(cu, kOpAdd, r_dest, value - data_target->operands[0]); - } + /* Do a low/high pair */ + res = NewLIR2(cu, kThumb2MovImm16, r_dest, Low16Bits(value)); + NewLIR2(cu, kThumb2MovImm16H, r_dest, High16Bits(value)); return res; } @@ -514,7 +529,7 @@ LIR* ArmCodegen::OpRegRegImm(CompilationUnit* cu, OpKind op, int r_dest, int r_s int mod_imm = ModifiedImmediate(value); LIR* res; if (mod_imm >= 0) { - res = NewLIR2(cu, kThumb2CmpRI8, r_src1, mod_imm); + res = NewLIR2(cu, kThumb2CmpRI12, r_src1, mod_imm); } else { int r_tmp = AllocTemp(cu); res = LoadConstant(cu, r_tmp, value); @@ -587,44 +602,11 @@ LIR* ArmCodegen::OpRegImm(CompilationUnit* cu, OpKind op, int r_dest_src1, int v } } -/* - * Determine whether value can be encoded as a Thumb2 floating point - * immediate. If not, return -1. If so return encoded 8-bit value. - */ -static int EncodeImmDoubleHigh(int value) +LIR* ArmCodegen::LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value) { - int res; - int bit_a = (value & 0x80000000) >> 31; - int not_bit_b = (value & 0x40000000) >> 30; - int bit_b = (value & 0x20000000) >> 29; - int b_smear = (value & 0x3fc00000) >> 22; - int slice = (value & 0x003f0000) >> 16; - int zeroes = (value & 0x0000ffff); - if (zeroes != 0) - return -1; - if (bit_b) { - if ((not_bit_b != 0) || (b_smear != 0xff)) - return -1; - } else { - if ((not_bit_b != 1) || (b_smear != 0x0)) - return -1; - } - res = (bit_a << 7) | (bit_b << 6) | slice; - return res; -} - -static int EncodeImmDouble(int val_lo, int val_hi) -{ - int res = -1; - if (val_lo == 0) - res = EncodeImmDoubleHigh(val_hi); - return res; -} - -LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi) -{ - LIR* res; + LIR* res = NULL; + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); int target_reg = S2d(r_dest_lo, r_dest_hi); if (ARM_FPREG(r_dest_lo)) { if ((val_lo == 0) && (val_hi == 0)) { @@ -635,26 +617,33 @@ LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r // +0.0 = +2.0 - +2.0 res = NewLIR3(cu, kThumb2Vsubd, target_reg, target_reg, target_reg); } else { - int encoded_imm = EncodeImmDouble(val_lo, val_hi); + int encoded_imm = EncodeImmDouble(value); if (encoded_imm >= 0) { res = NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, encoded_imm); - } else { - LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi); - if (data_target == NULL) { - data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi); - } - LIR* load_pc_rel = - RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd, - target_reg, r15pc, 0, 0, 0, data_target); - SetMemRefType(cu, load_pc_rel, true, kLiteral); - load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); - AppendLIR(cu, load_pc_rel); - res = load_pc_rel; } } } else { - res = LoadConstantNoClobber(cu, r_dest_lo, val_lo); - LoadConstantNoClobber(cu, r_dest_hi, val_hi); + if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) { + res = LoadConstantNoClobber(cu, r_dest_lo, val_lo); + LoadConstantNoClobber(cu, r_dest_hi, val_hi); + } + } + if (res == NULL) { + // No short form - load from the literal pool. + LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi); + if (data_target == NULL) { + data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi); + } + if (ARM_FPREG(r_dest_lo)) { + res = RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd, + target_reg, r15pc, 0, 0, 0, data_target); + } else { + res = RawLIR(cu, cu->current_dalvik_offset, kThumb2LdrdPcRel8, + r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target); + } + SetMemRefType(cu, res, true, kLiteral); + res->alias_info = reinterpret_cast<uintptr_t>(data_target); + AppendLIR(cu, res); } return res; } @@ -732,7 +721,7 @@ LIR* ArmCodegen::StoreBaseIndexed(CompilationUnit* cu, int rBase, int r_index, i int scale, OpSize size) { bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_src); - LIR* store; + LIR* store = NULL; ArmOpcode opcode = kThumbBkpt; bool thumb_form = (all_low_regs && (scale == 0)); int reg_ptr; @@ -798,14 +787,14 @@ LIR* ArmCodegen::LoadBaseDispBody(CompilationUnit* cu, int rBase, int displaceme int r_dest_hi, OpSize size, int s_reg) { Codegen* cg = cu->cg.get(); - LIR* res; - LIR* load; + LIR* load = NULL; ArmOpcode opcode = kThumbBkpt; bool short_form = false; bool thumb2Form = (displacement < 4092 && displacement >= 0); bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_dest)); int encoded_disp = displacement; bool is64bit = false; + bool already_generated = false; switch (size) { case kDouble: case kLong: @@ -822,11 +811,15 @@ LIR* ArmCodegen::LoadBaseDispBody(CompilationUnit* cu, int rBase, int displaceme } break; } else { - res = LoadBaseDispBody(cu, rBase, displacement, r_dest, - -1, kWord, s_reg); - LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi, - -1, kWord, INVALID_SREG); - return res; + if (displacement <= 1020) { + load = NewLIR4(cu, kThumb2LdrdI8, r_dest, r_dest_hi, rBase, displacement >> 2); + } else { + load = LoadBaseDispBody(cu, rBase, displacement, r_dest, + -1, kWord, s_reg); + LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi, + -1, kWord, INVALID_SREG); + } + already_generated = true; } case kSingle: case kWord: @@ -894,13 +887,15 @@ LIR* ArmCodegen::LoadBaseDispBody(CompilationUnit* cu, int rBase, int displaceme LOG(FATAL) << "Bad size: " << size; } - if (short_form) { - load = res = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp); - } else { - int reg_offset = AllocTemp(cu); - res = cg->LoadConstant(cu, reg_offset, encoded_disp); - load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size); - FreeTemp(cu, reg_offset); + if (!already_generated) { + if (short_form) { + load = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp); + } else { + int reg_offset = AllocTemp(cu); + cg->LoadConstant(cu, reg_offset, encoded_disp); + load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size); + FreeTemp(cu, reg_offset); + } } // TODO: in future may need to differentiate Dalvik accesses w/ spills @@ -926,30 +921,36 @@ LIR* ArmCodegen::LoadBaseDispWide(CompilationUnit* cu, int rBase, int displaceme LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacement, int r_src, int r_src_hi, OpSize size) { Codegen* cg = cu->cg.get(); - LIR* res, *store; + LIR* store = NULL; ArmOpcode opcode = kThumbBkpt; bool short_form = false; bool thumb2Form = (displacement < 4092 && displacement >= 0); bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_src)); int encoded_disp = displacement; bool is64bit = false; + bool already_generated = false; switch (size) { case kLong: case kDouble: is64bit = true; if (!ARM_FPREG(r_src)) { - res = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord); - StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord); - return res; - } - if (ARM_SINGLEREG(r_src)) { - DCHECK(ARM_FPREG(r_src_hi)); - r_src = cg->S2d(r_src, r_src_hi); - } - opcode = kThumb2Vstrd; - if (displacement <= 1020) { - short_form = true; - encoded_disp >>= 2; + if (displacement <= 1020) { + store = NewLIR4(cu, kThumb2StrdI8, r_src, r_src_hi, rBase, displacement >> 2); + } else { + store = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord); + StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord); + } + already_generated = true; + } else { + if (ARM_SINGLEREG(r_src)) { + DCHECK(ARM_FPREG(r_src_hi)); + r_src = cg->S2d(r_src, r_src_hi); + } + opcode = kThumb2Vstrd; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } } break; case kSingle: @@ -998,20 +999,22 @@ LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacem default: LOG(FATAL) << "Bad size: " << size; } - if (short_form) { - store = res = NewLIR3(cu, opcode, r_src, rBase, encoded_disp); - } else { - int r_scratch = AllocTemp(cu); - res = cg->LoadConstant(cu, r_scratch, encoded_disp); - store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size); - FreeTemp(cu, r_scratch); + if (!already_generated) { + if (short_form) { + store = NewLIR3(cu, opcode, r_src, rBase, encoded_disp); + } else { + int r_scratch = AllocTemp(cu); + cg->LoadConstant(cu, r_scratch, encoded_disp); + store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size); + FreeTemp(cu, r_scratch); + } } // TODO: In future, may need to differentiate Dalvik & spill accesses if (rBase == rARM_SP) { AnnotateDalvikRegAccess(cu, store, displacement >> 2, false /* is_load */, is64bit); } - return res; + return store; } LIR* ArmCodegen::StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, diff --git a/src/compiler/codegen/codegen.h b/src/compiler/codegen/codegen.h index 03ecb43171..901e5da6bb 100644 --- a/src/compiler/codegen/codegen.h +++ b/src/compiler/codegen/codegen.h @@ -236,8 +236,8 @@ class Codegen { int displacement, int r_dest, int r_dest_hi, OpSize size, int s_reg) = 0; virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value) = 0; - virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi) = 0; + virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, + int64_t value) = 0; virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, OpSize size) = 0; virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo, @@ -288,6 +288,10 @@ class Codegen { virtual bool IsUnconditionalBranch(LIR* lir) = 0; // Required for target - Dalvik-level generators. + virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) = 0; + virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, @@ -349,6 +353,9 @@ class Codegen { RegLocation rl_index, RegLocation rl_dest, int scale) = 0; virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale) = 0; + virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_shift) = 0; // Required for target - single operation generators. virtual LIR* OpUnconditionalBranch(CompilationUnit* cu, LIR* target) = 0; @@ -381,7 +388,10 @@ class Codegen { virtual void OpRegCopyWide(CompilationUnit* cu, int dest_lo, int dest_hi, int src_lo, int src_hi) = 0; virtual void OpTlsCmp(CompilationUnit* cu, int offset, int val) = 0; - virtual bool InexpensiveConstant(int reg, int value) = 0; + virtual bool InexpensiveConstantInt(int32_t value) = 0; + virtual bool InexpensiveConstantFloat(int32_t value) = 0; + virtual bool InexpensiveConstantLong(int64_t value) = 0; + virtual bool InexpensiveConstantDouble(int64_t value) = 0; // Temp workaround void Workaround7250540(CompilationUnit* cu, RegLocation rl_dest, int value); diff --git a/src/compiler/codegen/codegen_util.cc b/src/compiler/codegen/codegen_util.cc index ad05b93c8d..57d932f6e3 100644 --- a/src/compiler/codegen/codegen_util.cc +++ b/src/compiler/codegen/codegen_util.cc @@ -23,6 +23,27 @@ namespace art { +bool IsInexpensiveConstant(CompilationUnit* cu, RegLocation rl_src) +{ + bool res = false; + if (rl_src.is_const) { + if (rl_src.wide) { + if (rl_src.fp) { + res = cu->cg->InexpensiveConstantDouble(ConstantValueWide(cu, rl_src)); + } else { + res = cu->cg->InexpensiveConstantLong(ConstantValueWide(cu, rl_src)); + } + } else { + if (rl_src.fp) { + res = cu->cg->InexpensiveConstantFloat(ConstantValue(cu, rl_src)); + } else { + res = cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src)); + } + } + } + return res; +} + void MarkSafepointPC(CompilationUnit* cu, LIR* inst) { inst->def_mask = ENCODE_ALL; @@ -202,6 +223,9 @@ void DumpLIRInsn(CompilationUnit* cu, LIR* lir, unsigned char* base_addr) LOG(INFO) << "-------- entry offset: 0x" << std::hex << dest; break; case kPseudoDalvikByteCodeBoundary: + if (lir->operands[0] == 0) { + lir->operands[0] = reinterpret_cast<uintptr_t>("No instruction string"); + } LOG(INFO) << "-------- dalvik offset: 0x" << std::hex << lir->dalvik_offset << " @ " << reinterpret_cast<char*>(lir->operands[0]); break; @@ -471,6 +495,8 @@ LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi) LIR* lo_target = NULL; while (data_target) { if (lo_match && (data_target->operands[0] == val_hi)) { + // Record high word in case we need to expand this later. + lo_target->operands[1] = val_hi; return lo_target; } lo_match = false; @@ -488,7 +514,7 @@ LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi) * instruction streams. */ -/* Add a 32-bit constant either in the constant pool */ +/* Add a 32-bit constant to the constant pool */ LIR* AddWordData(CompilationUnit* cu, LIR* *constant_list_p, int value) { /* Add the constant to the literal pool */ @@ -1097,4 +1123,21 @@ bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) return is_taken; } +// Convert relation of src1/src2 to src2/src1 +ConditionCode FlipComparisonOrder(ConditionCode before) { + ConditionCode res; + switch (before) { + case kCondEq: res = kCondEq; break; + case kCondNe: res = kCondNe; break; + case kCondLt: res = kCondGt; break; + case kCondGt: res = kCondLt; break; + case kCondLe: res = kCondGe; break; + case kCondGe: res = kCondLe; break; + default: + res = static_cast<ConditionCode>(0); + LOG(FATAL) << "Unexpected ccode " << before; + } + return res; +} + } // namespace art diff --git a/src/compiler/codegen/codegen_util.h b/src/compiler/codegen/codegen_util.h index 4f146560b2..9b9bece585 100644 --- a/src/compiler/codegen/codegen_util.h +++ b/src/compiler/codegen/codegen_util.h @@ -20,6 +20,7 @@ #include <stdint.h> #include "compiler/compiler_enums.h" +#include "compiler/compiler_ir.h" namespace art { @@ -59,6 +60,8 @@ void DumpPackedSwitchTable(const uint16_t* table); LIR* MarkBoundary(CompilationUnit* cu, int offset, const char* inst_str); void NopLIR(LIR* lir); bool EvaluateBranch(Instruction::Code opcode, int src1, int src2); +bool IsInexpensiveConstant(CompilationUnit* cu, RegLocation rl_src); +ConditionCode FlipComparisonOrder(ConditionCode before); } // namespace art diff --git a/src/compiler/codegen/gen_common.cc b/src/compiler/codegen/gen_common.cc index 1d64a71848..a4c8d0cf74 100644 --- a/src/compiler/codegen/gen_common.cc +++ b/src/compiler/codegen/gen_common.cc @@ -55,7 +55,7 @@ LIR* Codegen::GenImmedCheck(CompilationUnit* cu, ConditionCode c_code, int reg, ThrowKind kind) { LIR* tgt = RawLIR(cu, 0, kPseudoThrowTarget, kind, - cu->current_dalvik_offset); + cu->current_dalvik_offset, reg, imm_val); LIR* branch; if (c_code == kCondAl) { branch = OpUnconditionalBranch(cu, tgt); @@ -89,23 +89,6 @@ LIR* Codegen::GenRegRegCheck(CompilationUnit* cu, ConditionCode c_code, int reg1 return branch; } -// Convert relation of src1/src2 to src2/src1 -ConditionCode FlipComparisonOrder(ConditionCode before) { - ConditionCode res; - switch (before) { - case kCondEq: res = kCondEq; break; - case kCondNe: res = kCondNe; break; - case kCondLt: res = kCondGt; break; - case kCondGt: res = kCondLt; break; - case kCondLe: res = kCondGe; break; - case kCondGe: res = kCondLe; break; - default: - res = static_cast<ConditionCode>(0); - LOG(FATAL) << "Unexpected ccode " << before; - } - return res; -} - void Codegen::GenCompareAndBranch(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_src2, LIR* taken, LIR* fall_through) @@ -146,12 +129,12 @@ void Codegen::GenCompareAndBranch(CompilationUnit* cu, Instruction::Code opcode, rl_src1 = LoadValue(cu, rl_src1, kCoreReg); // Is this really an immediate comparison? if (rl_src2.is_const) { - int immval = cu->constant_values[rl_src2.orig_sreg]; // If it's already live in a register or not easily materialized, just keep going RegLocation rl_temp = UpdateLoc(cu, rl_src2); - if ((rl_temp.location == kLocDalvikFrame) && InexpensiveConstant(rl_src1.low_reg, immval)) { + if ((rl_temp.location == kLocDalvikFrame) && + InexpensiveConstantInt(ConstantValue(cu, rl_src2))) { // OK - convert this to a compare immediate and branch - OpCmpImmBranch(cu, cond, rl_src1.low_reg, immval, taken); + OpCmpImmBranch(cu, cond, rl_src1.low_reg, ConstantValue(cu, rl_src2), taken); OpUnconditionalBranch(cu, fall_through); return; } @@ -614,6 +597,18 @@ void Codegen::HandleThrowLaunchPads(CompilationUnit *cu) case kThrowNullPointer: func_offset = ENTRYPOINT_OFFSET(pThrowNullPointerFromCode); break; + case kThrowConstantArrayBounds: // v1 is length reg (for Arm/Mips), v2 constant index + // v1 holds the constant array index. Mips/Arm uses v2 for length, x86 reloads. + if (target_x86) { + OpRegMem(cu, kOpMov, TargetReg(kArg1), v1, mirror::Array::LengthOffset().Int32Value()); + } else { + OpRegCopy(cu, TargetReg(kArg1), v1); + } + // Make sure the following LoadConstant doesn't mess with kArg1. + LockTemp(cu, TargetReg(kArg1)); + LoadConstant(cu, TargetReg(kArg0), v2); + func_offset = ENTRYPOINT_OFFSET(pThrowArrayBoundsFromCode); + break; case kThrowArrayBounds: // Move v1 (array index) to kArg0 and v2 (array length) to kArg1 if (v2 != TargetReg(kArg0)) { @@ -1602,9 +1597,14 @@ bool Codegen::GenArithOpLong(CompilationUnit* cu, Instruction::Code opcode, RegL break; case Instruction::MUL_LONG: case Instruction::MUL_LONG_2ADDR: - call_out = true; - ret_reg = TargetReg(kRet0); - func_offset = ENTRYPOINT_OFFSET(pLmul); + if (cu->instruction_set == kThumb2) { + GenMulLong(cu, rl_dest, rl_src1, rl_src2); + return false; + } else { + call_out = true; + ret_reg = TargetReg(kRet0); + func_offset = ENTRYPOINT_OFFSET(pLmul); + } break; case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: diff --git a/src/compiler/codegen/gen_loadstore.cc b/src/compiler/codegen/gen_loadstore.cc index b183f9e245..c8f9c51ffd 100644 --- a/src/compiler/codegen/gen_loadstore.cc +++ b/src/compiler/codegen/gen_loadstore.cc @@ -59,12 +59,20 @@ void Codegen::Workaround7250540(CompilationUnit* cu, RegLocation rl_dest, int ze return; } } + int temp_reg = zero_reg; + if (temp_reg == INVALID_REG) { + temp_reg = AllocTemp(cu); + cu->cg->LoadConstant(cu, temp_reg, 0); + } if (cu->promotion_map[pmap_index].core_location == kLocPhysReg) { // Promoted - just copy in a zero - OpRegCopy(cu, cu->promotion_map[pmap_index].core_reg, zero_reg); + OpRegCopy(cu, cu->promotion_map[pmap_index].core_reg, temp_reg); } else { // Lives in the frame, need to store. - StoreBaseDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_dest.s_reg_low), zero_reg, kWord); + StoreBaseDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_dest.s_reg_low), temp_reg, kWord); + } + if (zero_reg == INVALID_REG) { + FreeTemp(cu, temp_reg); } } } @@ -92,14 +100,12 @@ void Codegen::LoadValueDirect(CompilationUnit* cu, RegLocation rl_src, int r_des rl_src = UpdateLoc(cu, rl_src); if (rl_src.location == kLocPhysReg) { OpRegCopy(cu, r_dest, rl_src.low_reg); + } else if (IsInexpensiveConstant(cu, rl_src)) { + LoadConstantNoClobber(cu, r_dest, ConstantValue(cu, rl_src)); } else { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); - if (rl_src.is_const && InexpensiveConstant(r_dest, cu->constant_values[rl_src.orig_sreg])) { - LoadConstantNoClobber(cu, r_dest, cu->constant_values[rl_src.orig_sreg]); - } else { - LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest); - } + LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest); } } @@ -126,6 +132,8 @@ void Codegen::LoadValueDirectWide(CompilationUnit* cu, RegLocation rl_src, int r rl_src = UpdateLocWide(cu, rl_src); if (rl_src.location == kLocPhysReg) { OpRegCopyWide(cu, reg_lo, reg_hi, rl_src.low_reg, rl_src.high_reg); + } else if (IsInexpensiveConstant(cu, rl_src)) { + LoadConstantWide(cu, reg_lo, reg_hi, ConstantValueWide(cu, rl_src)); } else { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); @@ -152,9 +160,7 @@ void Codegen::LoadValueDirectWideFixed(CompilationUnit* cu, RegLocation rl_src, RegLocation Codegen::LoadValue(CompilationUnit* cu, RegLocation rl_src, RegisterClass op_kind) { rl_src = EvalLoc(cu, rl_src, op_kind, false); - if (rl_src.location != kLocPhysReg) { - DCHECK((rl_src.location == kLocDalvikFrame) || - (rl_src.location == kLocCompilerTemp)); + if (IsInexpensiveConstant(cu, rl_src) || rl_src.location != kLocPhysReg) { LoadValueDirect(cu, rl_src, rl_src.low_reg); rl_src.location = kLocPhysReg; MarkLive(cu, rl_src.low_reg, rl_src.s_reg_low); @@ -222,14 +228,11 @@ RegLocation Codegen::LoadValueWide(CompilationUnit* cu, RegLocation rl_src, Regi { DCHECK(rl_src.wide); rl_src = EvalLoc(cu, rl_src, op_kind, false); - if (rl_src.location != kLocPhysReg) { - DCHECK((rl_src.location == kLocDalvikFrame) || - (rl_src.location == kLocCompilerTemp)); + if (IsInexpensiveConstant(cu, rl_src) || rl_src.location != kLocPhysReg) { LoadValueDirectWide(cu, rl_src, rl_src.low_reg, rl_src.high_reg); rl_src.location = kLocPhysReg; MarkLive(cu, rl_src.low_reg, rl_src.s_reg_low); - MarkLive(cu, rl_src.high_reg, - GetSRegHi(rl_src.s_reg_low)); + MarkLive(cu, rl_src.high_reg, GetSRegHi(rl_src.s_reg_low)); } return rl_src; } diff --git a/src/compiler/codegen/local_optimizations.cc b/src/compiler/codegen/local_optimizations.cc index b6981cac03..2b86421bc4 100644 --- a/src/compiler/codegen/local_optimizations.cc +++ b/src/compiler/codegen/local_optimizations.cc @@ -81,13 +81,20 @@ static void ApplyLoadStoreElimination(CompilationUnit* cu, LIR* head_lir, LIR* t if (head_lir == tail_lir) return; for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) { + + if (is_pseudo_opcode(this_lir->opcode)) continue; + int sink_distance = 0; + uint64_t target_flags = cg->GetTargetInstFlags(this_lir->opcode); + /* Skip non-interesting instructions */ if ((this_lir->flags.is_nop == true) || - is_pseudo_opcode(this_lir->opcode) || - (cg->GetTargetInstFlags(this_lir->opcode) & IS_BRANCH) || - !(cg->GetTargetInstFlags(this_lir->opcode) & (IS_LOAD | IS_STORE))) { + (target_flags & IS_BRANCH) || + ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads. + ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) == + (REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores. + !(target_flags & (IS_LOAD | IS_STORE))) { continue; } @@ -130,7 +137,7 @@ static void ApplyLoadStoreElimination(CompilationUnit* cu, LIR* head_lir, LIR* t * Skip already dead instructions (whose dataflow information is * outdated and misleading). */ - if (check_lir->flags.is_nop) continue; + if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) continue; uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM; uint64_t alias_condition = this_mem_mask & check_mem_mask; @@ -139,14 +146,18 @@ static void ApplyLoadStoreElimination(CompilationUnit* cu, LIR* head_lir, LIR* t /* * Potential aliases seen - check the alias relations */ - if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { - bool is_check_lir_load = cg->GetTargetInstFlags(check_lir->opcode) & IS_LOAD; + uint64_t check_flags = cg->GetTargetInstFlags(check_lir->opcode); + // TUNING: Support instructions with multiple register targets. + if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) { + stop_here = true; + } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + bool is_check_lir_load = check_flags & IS_LOAD; if (alias_condition == ENCODE_LITERAL) { /* * Should only see literal loads in the instruction * stream. */ - DCHECK(!(cg->GetTargetInstFlags(check_lir->opcode) & IS_STORE)); + DCHECK(!(check_flags & IS_STORE)); /* Same value && same register type */ if (check_lir->alias_info == this_lir->alias_info && cg->SameRegType(check_lir->operands[0], native_reg_id)) { @@ -276,10 +287,13 @@ void ApplyLoadHoisting(CompilationUnit* cu, LIR* head_lir, LIR* tail_lir) /* Start from the second instruction */ for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) { + if (is_pseudo_opcode(this_lir->opcode)) continue; + + uint64_t target_flags = cg->GetTargetInstFlags(this_lir->opcode); /* Skip non-interesting instructions */ if ((this_lir->flags.is_nop == true) || - is_pseudo_opcode(this_lir->opcode) || - !(cg->GetTargetInstFlags(this_lir->opcode) & IS_LOAD)) { + ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || + !(target_flags & IS_LOAD)) { continue; } diff --git a/src/compiler/codegen/mips/codegen_mips.h b/src/compiler/codegen/mips/codegen_mips.h index 705ecfa393..a4d44d5c61 100644 --- a/src/compiler/codegen/mips/codegen_mips.h +++ b/src/compiler/codegen/mips/codegen_mips.h @@ -38,8 +38,7 @@ class MipsCodegen : public Codegen { int displacement, int r_dest, int r_dest_hi, OpSize size, int s_reg); virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value); - virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi); + virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value); virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, OpSize size); virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo, @@ -90,12 +89,18 @@ class MipsCodegen : public Codegen { virtual bool IsUnconditionalBranch(LIR* lir); // Required for target - Dalvik-level generators. + virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); + virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); + virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, @@ -191,7 +196,10 @@ class MipsCodegen : public Codegen { void SpillCoreRegs(CompilationUnit* cu); void UnSpillCoreRegs(CompilationUnit* cu); static const MipsEncodingMap EncodingMap[kMipsLast]; - bool InexpensiveConstant(int reg, int value); + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); }; } // namespace art diff --git a/src/compiler/codegen/mips/int_mips.cc b/src/compiler/codegen/mips/int_mips.cc index 7da4cf684b..675cf8d86a 100644 --- a/src/compiler/codegen/mips/int_mips.cc +++ b/src/compiler/codegen/mips/int_mips.cc @@ -341,6 +341,13 @@ LIR* MipsCodegen::OpIT(CompilationUnit* cu, ConditionCode cond, const char* guid return NULL; } +void MipsCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenMulLong for Mips"; + return; +} + bool MipsCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { @@ -635,4 +642,18 @@ void MipsCodegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation MarkGCCard(cu, r_value, r_array); } +bool MipsCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) +{ + // Default implementation is just to ignore the constant case. + return GenShiftOpLong(cu, opcode, rl_dest, rl_src1, rl_shift); +} + +bool MipsCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + // Default - bail to non-const handler. + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); +} + } // namespace art diff --git a/src/compiler/codegen/mips/utility_mips.cc b/src/compiler/codegen/mips/utility_mips.cc index 1e217fbe75..12d054cf3c 100644 --- a/src/compiler/codegen/mips/utility_mips.cc +++ b/src/compiler/codegen/mips/utility_mips.cc @@ -52,17 +52,24 @@ LIR* MipsCodegen::OpFpRegCopy(CompilationUnit *cu, int r_dest, int r_src) return res; } -bool MipsCodegen::InexpensiveConstant(int reg, int value) +bool MipsCodegen::InexpensiveConstantInt(int32_t value) { - bool res = false; - if (value == 0) { - res = true; - } else if (IsUint(16, value)) { - res = true; - } else if ((value < 0) && (value >= -32768)) { - res = true; - } - return res; + return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768))); +} + +bool MipsCodegen::InexpensiveConstantFloat(int32_t value) +{ + return false; // TUNING +} + +bool MipsCodegen::InexpensiveConstantLong(int64_t value) +{ + return false; // TUNING +} + +bool MipsCodegen::InexpensiveConstantDouble(int64_t value) +{ + return false; // TUNING } /* @@ -336,12 +343,11 @@ LIR* MipsCodegen::OpRegReg(CompilationUnit *cu, OpKind op, int r_dest_src1, int return NewLIR2(cu, opcode, r_dest_src1, r_src2); } -LIR* MipsCodegen::LoadConstantValueWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi) +LIR* MipsCodegen::LoadConstantWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, int64_t value) { LIR *res; - res = LoadConstantNoClobber(cu, r_dest_lo, val_lo); - LoadConstantNoClobber(cu, r_dest_hi, val_hi); + res = LoadConstantNoClobber(cu, r_dest_lo, Low32Bits(value)); + LoadConstantNoClobber(cu, r_dest_hi, High32Bits(value)); return res; } diff --git a/src/compiler/codegen/mir_to_gbc.cc b/src/compiler/codegen/mir_to_gbc.cc index f67f760d40..ba90269987 100644 --- a/src/compiler/codegen/mir_to_gbc.cc +++ b/src/compiler/codegen/mir_to_gbc.cc @@ -1018,7 +1018,7 @@ static bool ConvertMIRNode(CompilationUnit* cu, MIR* mir, BasicBlock* bb, } EmitPopShadowFrame(cu); cu->irb->CreateRet(GetLLVMValue(cu, rl_src[0].orig_sreg)); - bb->has_return = true; + DCHECK(bb->has_return); } break; @@ -1028,7 +1028,7 @@ static bool ConvertMIRNode(CompilationUnit* cu, MIR* mir, BasicBlock* bb, } EmitPopShadowFrame(cu); cu->irb->CreateRetVoid(); - bb->has_return = true; + DCHECK(bb->has_return); } break; @@ -2572,8 +2572,7 @@ static void CvtConst(CompilationUnit* cu, llvm::CallInst* call_inst) RegLocation rl_dest = GetLoc(cu, call_inst); RegLocation rl_result = EvalLoc(cu, rl_dest, kAnyReg, true); if (rl_dest.wide) { - cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg, - (immval) & 0xffffffff, (immval >> 32) & 0xffffffff); + cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, immval); cg->StoreValueWide(cu, rl_dest, rl_result); } else { int immediate = immval & 0xffffffff; diff --git a/src/compiler/codegen/mir_to_lir.cc b/src/compiler/codegen/mir_to_lir.cc index bd26f2d64e..96de65e644 100644 --- a/src/compiler/codegen/mir_to_lir.cc +++ b/src/compiler/codegen/mir_to_lir.cc @@ -164,23 +164,21 @@ static bool CompileDalvikInstruction(CompilationUnit* cu, MIR* mir, BasicBlock* case Instruction::CONST_WIDE_16: case Instruction::CONST_WIDE_32: rl_result = EvalLoc(cu, rl_dest, kAnyReg, true); - cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg, vB, - (vB & 0x80000000) ? -1 : 0); + cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, + static_cast<int64_t>(static_cast<int32_t>(vB))); cg->StoreValueWide(cu, rl_dest, rl_result); break; case Instruction::CONST_WIDE: rl_result = EvalLoc(cu, rl_dest, kAnyReg, true); - cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg, - mir->dalvikInsn.vB_wide & 0xffffffff, - (mir->dalvikInsn.vB_wide >> 32) & 0xffffffff); + cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide); cg->StoreValueWide(cu, rl_dest, rl_result); break; case Instruction::CONST_WIDE_HIGH16: rl_result = EvalLoc(cu, rl_dest, kAnyReg, true); - cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg, - 0, vB << 16); + cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, + static_cast<int64_t>(vB) << 48); cg->StoreValueWide(cu, rl_dest, rl_result); break; @@ -543,11 +541,11 @@ static bool CompileDalvikInstruction(CompilationUnit* cu, MIR* mir, BasicBlock* case Instruction::XOR_INT: case Instruction::XOR_INT_2ADDR: if (rl_src[0].is_const && - cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[0].orig_sreg])) { + cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[0]))) { cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[1], cu->constant_values[rl_src[0].orig_sreg]); } else if (rl_src[1].is_const && - cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) { + cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[1]))) { cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0], cu->constant_values[rl_src[1].orig_sreg]); } else { @@ -568,9 +566,8 @@ static bool CompileDalvikInstruction(CompilationUnit* cu, MIR* mir, BasicBlock* case Instruction::USHR_INT: case Instruction::USHR_INT_2ADDR: if (rl_src[1].is_const && - cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) { - cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0], - cu->constant_values[rl_src[1].orig_sreg]); + cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[1]))) { + cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0], ConstantValue(cu, rl_src[1])); } else { cg->GenArithOpInt(cu, opcode, rl_dest, rl_src[0], rl_src[1]); } @@ -578,20 +575,26 @@ static bool CompileDalvikInstruction(CompilationUnit* cu, MIR* mir, BasicBlock* case Instruction::ADD_LONG: case Instruction::SUB_LONG: - case Instruction::MUL_LONG: - case Instruction::DIV_LONG: - case Instruction::REM_LONG: case Instruction::AND_LONG: case Instruction::OR_LONG: case Instruction::XOR_LONG: case Instruction::ADD_LONG_2ADDR: case Instruction::SUB_LONG_2ADDR: - case Instruction::MUL_LONG_2ADDR: - case Instruction::DIV_LONG_2ADDR: - case Instruction::REM_LONG_2ADDR: case Instruction::AND_LONG_2ADDR: case Instruction::OR_LONG_2ADDR: case Instruction::XOR_LONG_2ADDR: + if (rl_src[0].is_const || rl_src[1].is_const) { + cg->GenArithImmOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]); + break; + } + // Note: intentional fallthrough. + + case Instruction::MUL_LONG: + case Instruction::DIV_LONG: + case Instruction::REM_LONG: + case Instruction::MUL_LONG_2ADDR: + case Instruction::DIV_LONG_2ADDR: + case Instruction::REM_LONG_2ADDR: cg->GenArithOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]); break; @@ -601,7 +604,11 @@ static bool CompileDalvikInstruction(CompilationUnit* cu, MIR* mir, BasicBlock* case Instruction::SHL_LONG_2ADDR: case Instruction::SHR_LONG_2ADDR: case Instruction::USHR_LONG_2ADDR: - cg->GenShiftOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]); + if (rl_src[1].is_const) { + cg->GenShiftImmOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]); + } else { + cg->GenShiftOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]); + } break; case Instruction::ADD_FLOAT: diff --git a/src/compiler/codegen/ralloc_util.cc b/src/compiler/codegen/ralloc_util.cc index afd49768d0..1d5f3aca86 100644 --- a/src/compiler/codegen/ralloc_util.cc +++ b/src/compiler/codegen/ralloc_util.cc @@ -64,7 +64,7 @@ void CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num) } } -static void DumpRegPool(RegisterInfo* p, int num_regs) +void DumpRegPool(RegisterInfo* p, int num_regs) { LOG(INFO) << "================================================"; for (int i = 0; i < num_regs; i++) { @@ -1091,21 +1091,14 @@ static void CountRefs(CompilationUnit *cu, BasicBlock* bb, RefCounts* core_count RegLocation loc = cu->reg_location[i]; RefCounts* counts = loc.fp ? fp_counts : core_counts; int p_map_idx = SRegToPMap(cu, loc.s_reg_low); - int sample_reg = loc.fp ? cu->reg_pool->FPRegs[0].reg : cu->reg_pool->core_regs[0].reg; - bool simple_immediate = loc.is_const && - !cu->cg->InexpensiveConstant(sample_reg, cu->constant_values[loc.orig_sreg]); - if (loc.defined) { - // Don't count easily regenerated immediates - if (!simple_immediate) { - counts[p_map_idx].count += cu->use_counts.elem_list[i]; - } + //Don't count easily regenerated immediates + if (loc.fp || loc.wide || !IsInexpensiveConstant(cu, loc)) { + counts[p_map_idx].count += cu->use_counts.elem_list[i]; } if (loc.wide) { - if (loc.defined) { - if (loc.fp && !simple_immediate) { - counts[p_map_idx].double_start = true; - counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1]; - } + if (loc.fp) { + counts[p_map_idx].double_start = true; + counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1]; } i += 2; } else { diff --git a/src/compiler/codegen/ralloc_util.h b/src/compiler/codegen/ralloc_util.h index a5ed9999da..67c22b56e2 100644 --- a/src/compiler/codegen/ralloc_util.h +++ b/src/compiler/codegen/ralloc_util.h @@ -157,6 +157,7 @@ void RecordCorePromotion(CompilationUnit* cu, int reg, int s_reg); void RecordFpPromotion(CompilationUnit* cu, int reg, int s_reg); int ComputeFrameSize(CompilationUnit* cu); int SRegToPMap(CompilationUnit* cu, int s_reg); +void DumpRegPool(RegisterInfo* p, int num_regs); } // namespace art diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h index 141638cf1a..9cc17f130e 100644 --- a/src/compiler/codegen/x86/codegen_x86.h +++ b/src/compiler/codegen/x86/codegen_x86.h @@ -38,8 +38,7 @@ class X86Codegen : public Codegen { int displacement, int r_dest, int r_dest_hi, OpSize size, int s_reg); virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value); - virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi); + virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value); virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, OpSize size); virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo, @@ -90,12 +89,18 @@ class X86Codegen : public Codegen { virtual bool IsUnconditionalBranch(LIR* lir); // Required for target - Dalvik-level generators. + virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); + virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); + virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, @@ -188,7 +193,10 @@ class X86Codegen : public Codegen { void SpillCoreRegs(CompilationUnit* cu); void UnSpillCoreRegs(CompilationUnit* cu); static const X86EncodingMap EncodingMap[kX86Last]; - bool InexpensiveConstant(int reg, int value); + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); }; } // namespace art diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc index 0ae51e0e23..d4a34f7d37 100644 --- a/src/compiler/codegen/x86/int_x86.cc +++ b/src/compiler/codegen/x86/int_x86.cc @@ -322,6 +322,13 @@ LIR* X86Codegen::OpIT(CompilationUnit* cu, ConditionCode cond, const char* guide LOG(FATAL) << "Unexpected use of OpIT in x86"; return NULL; } + +void X86Codegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenX86Long for x86"; + return; +} bool X86Codegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { @@ -583,4 +590,18 @@ void X86Codegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation MarkGCCard(cu, r_value, r_array); } +bool X86Codegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) +{ + // Default implementation is just to ignore the constant case. + return GenShiftOpLong(cu, opcode, rl_dest, rl_src1, rl_shift); +} + +bool X86Codegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + // Default - bail to non-const handler. + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); +} + } // namespace art diff --git a/src/compiler/codegen/x86/utility_x86.cc b/src/compiler/codegen/x86/utility_x86.cc index 4f9e28b444..4cc2c182cb 100644 --- a/src/compiler/codegen/x86/utility_x86.cc +++ b/src/compiler/codegen/x86/utility_x86.cc @@ -50,11 +50,26 @@ LIR* X86Codegen::OpFpRegCopy(CompilationUnit *cu, int r_dest, int r_src) return res; } -bool X86Codegen::InexpensiveConstant(int reg, int value) +bool X86Codegen::InexpensiveConstantInt(int32_t value) { return true; } +bool X86Codegen::InexpensiveConstantFloat(int32_t value) +{ + return false; +} + +bool X86Codegen::InexpensiveConstantLong(int64_t value) +{ + return true; +} + +bool X86Codegen::InexpensiveConstantDouble(int64_t value) +{ + return false; // TUNING +} + /* * Load a immediate using a shortcut if possible; otherwise * grab from the per-translation literal pool. If target is @@ -316,13 +331,14 @@ LIR* X86Codegen::OpMem(CompilationUnit* cu, OpKind op, int rBase, int disp) return NewLIR2(cu, opcode, rBase, disp); } -LIR* X86Codegen::LoadConstantValueWide(CompilationUnit *cu, int r_dest_lo, - int r_dest_hi, int val_lo, int val_hi) +LIR* X86Codegen::LoadConstantWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, int64_t value) { + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); LIR *res; if (X86_FPREG(r_dest_lo)) { DCHECK(X86_FPREG(r_dest_hi)); // ignore r_dest_hi - if (val_lo == 0 && val_hi == 0) { + if (value == 0) { return NewLIR2(cu, kX86XorpsRR, r_dest_lo, r_dest_lo); } else { if (val_lo == 0) { |