diff options
| author | 2012-12-14 13:35:28 -0800 | |
|---|---|---|
| committer | 2013-02-06 12:16:33 -0800 | |
| commit | 4ef3e45d7c6ec3c482a1a48f4df470811aa3cf0a (patch) | |
| tree | 5d91ff23708048a2b453a5917dab82be1b545f91 /src/compiler/codegen/arm | |
| parent | f84f99fe3a944310fdfb6f17836079ac5c48c799 (diff) | |
Compiler constant handling rework
In preparation for de-optimization, reworked the constant
handling mechanism. Also took advantage of knowledge of
constant operands (particularly for long operations).
Significant performance improvements for Mandelbrot
(~60 seconds to ~34 seconds). Minor improvements in other
benchmarks.
The new constant handling breaks two of the existing
optimization passes: "Skip Large Method" and "Load/Store
Elimization."
I don't intend to update the large method optimization
because it will be superceeded by the upcoming interpreter/
fingerprinting mechanism. Leaving the code in place for
now in order to compare compile-time improvements with
fingerprinting/interpret. All related code will be deleted
when that is complete.
The load/store elimination pass needs some rework to handle
uses of multiple-register loads and stores. It will be
updated & restored in a future CL.
Change-Id: Ia979abaf51b8ae81bbb0428031cbcea854625fac
Diffstat (limited to 'src/compiler/codegen/arm')
| -rw-r--r-- | src/compiler/codegen/arm/arm_lir.h | 5 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/assemble_arm.cc | 56 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/codegen_arm.h | 18 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/int_arm.cc | 398 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/utility_arm.cc | 243 |
5 files changed, 556 insertions, 164 deletions
diff --git a/src/compiler/codegen/arm/arm_lir.h b/src/compiler/codegen/arm/arm_lir.h index 3fc87924cf..c41f53bf9a 100644 --- a/src/compiler/codegen/arm/arm_lir.h +++ b/src/compiler/codegen/arm/arm_lir.h @@ -371,7 +371,7 @@ enum ArmOpcode { kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0]. kThumb2Pop, // pop [1110100010111101] list[15-0]*/ kThumb2Push, // push [1110100100101101] list[15-0]*/ - kThumb2CmpRI8, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. + kThumb2CmpRI12, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. @@ -445,6 +445,9 @@ enum ArmOpcode { kThumb2Pop1, // t3 encoding of pop. kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. + kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024. + kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024]. + kThumb2StrdI8, // strd rt, rt2, [rn +-/1024]. kArmLast, }; diff --git a/src/compiler/codegen/arm/assemble_arm.cc b/src/compiler/codegen/arm/assemble_arm.cc index 91f25d68e6..455ea67577 100644 --- a/src/compiler/codegen/arm/assemble_arm.cc +++ b/src/compiler/codegen/arm/assemble_arm.cc @@ -646,7 +646,7 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4), - ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00, + ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, @@ -917,8 +917,8 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { "b", "!0t", 4), ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "movh", "!0C, #!1M", 4), + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, + "movt", "!0C, #!1M", 4), ENCODING_MAP(kThumb2AddPCR, 0x4487, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, @@ -936,8 +936,8 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { "mov", "!0C, #!1M", 4), ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, - "movh", "!0C, #!1M", 4), + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, + "movt", "!0C, #!1M", 4), ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, @@ -972,7 +972,21 @@ const ArmEncodingMap ArmCodegen::EncodingMap[kArmLast] = { kFmtBitBlt, 3, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, "smull", "!0C, !1C, !2C, !3C", 4), - + ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldrd", "!0C, !1C, [pc, #!2E]", 4), + ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, + "ldrd", "!0C, !1C, [!2C, #!3E]", 4), + ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, + "strd", "!0C, !1C, [!2C, #!3E]", 4), }; /* @@ -1023,13 +1037,14 @@ AssemblerStatus ArmCodegen::AssembleInstructions(CompilationUnit* cu, uintptr_t if (lir->opcode == kThumbLdrPcRel || lir->opcode == kThumb2LdrPcRel12 || lir->opcode == kThumbAddPcRel || + lir->opcode == kThumb2LdrdPcRel8 || ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { /* * PC-relative loads are mostly used to load immediates * that are too large to materialize directly in one shot. * However, if the load displacement exceeds the limit, - * we revert to a 2-instruction materialization sequence. + * we revert to a multiple-instruction materialization sequence. */ LIR *lir_target = lir->target; uintptr_t pc = (lir->offset + 4) & ~3; @@ -1044,8 +1059,9 @@ AssemblerStatus ArmCodegen::AssembleInstructions(CompilationUnit* cu, uintptr_t // Shouldn't happen in current codegen. LOG(FATAL) << "Unexpected pc-rel offset " << delta; } - // Now, check for the two difficult cases + // Now, check for the difficult cases if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { /* @@ -1053,26 +1069,34 @@ AssemblerStatus ArmCodegen::AssembleInstructions(CompilationUnit* cu, uintptr_t * vldrs/vldrd we include REG_DEF_LR in the resource * masks for these instructions. */ - int base_reg = (lir->opcode == kThumb2LdrPcRel12) ? - lir->operands[0] : rARM_LR; + int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12)) + ? lir->operands[0] : rARM_LR; - // Add new Adr to generate the address + // Add new Adr to generate the address. LIR* new_adr = RawLIR(cu, lir->dalvik_offset, kThumb2Adr, base_reg, 0, 0, 0, 0, lir->target); InsertLIRBefore(lir, new_adr); - // Convert to normal load + // Convert to normal load. if (lir->opcode == kThumb2LdrPcRel12) { lir->opcode = kThumb2LdrRRI12; + } else if (lir->opcode == kThumb2LdrdPcRel8) { + lir->opcode = kThumb2LdrdI8; + } + // Change the load to be relative to the new Adr base. + if (lir->opcode == kThumb2LdrdI8) { + lir->operands[3] = 0; + lir->operands[2] = base_reg; + } else { + lir->operands[2] = 0; + lir->operands[1] = base_reg; } - // Change the load to be relative to the new Adr base - lir->operands[1] = base_reg; - lir->operands[2] = 0; SetupResourceMasks(cu, lir); res = kRetryAll; } else { if ((lir->opcode == kThumb2Vldrs) || - (lir->opcode == kThumb2Vldrd)) { + (lir->opcode == kThumb2Vldrd) || + (lir->opcode == kThumb2LdrdPcRel8)) { lir->operands[2] = delta >> 2; } else { lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h index ea34ff2b26..4dadd6c821 100644 --- a/src/compiler/codegen/arm/codegen_arm.h +++ b/src/compiler/codegen/arm/codegen_arm.h @@ -37,8 +37,7 @@ class ArmCodegen : public Codegen { int displacement, int r_dest, int r_dest_hi, OpSize size, int s_reg); virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value); - virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi); + virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value); virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, OpSize size); virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo, @@ -89,12 +88,18 @@ class ArmCodegen : public Codegen { virtual bool IsUnconditionalBranch(LIR* lir); // Required for target - Dalvik-level generators. + virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale); + virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); + virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, @@ -197,7 +202,14 @@ class ArmCodegen : public Codegen { static int EncodeShift(int code, int amount); static int ModifiedImmediate(uint32_t value); static ArmConditionCode ArmConditionEncoding(ConditionCode code); - bool InexpensiveConstant(int reg, int value); + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + private: + void GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode); }; } // namespace art diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc index fcf74f1471..5a9786c0d6 100644 --- a/src/compiler/codegen/arm/int_arm.cc +++ b/src/compiler/codegen/arm/int_arm.cc @@ -121,16 +121,81 @@ void ArmCodegen::GenCmpLong(CompilationUnit* cu, RegLocation rl_dest, RegLocatio branch3->target = branch1->target; } -void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir) +void ArmCodegen::GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode) { + int32_t val_lo = Low32Bits(val); + int32_t val_hi = High32Bits(val); + DCHECK(ModifiedImmediate(val_lo) >= 0); + DCHECK(ModifiedImmediate(val_hi) >= 0); LIR* label_list = cu->block_label_list; LIR* taken = &label_list[bb->taken->id]; LIR* not_taken = &label_list[bb->fall_through->id]; + rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); + int32_t low_reg = rl_src1.low_reg; + int32_t high_reg = rl_src1.high_reg; + + switch(ccode) { + case kCondEq: + OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, not_taken); + break; + case kCondNe: + OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, taken); + break; + case kCondLt: + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken); + ccode = kCondCc; + break; + case kCondLe: + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken); + ccode = kCondLs; + break; + case kCondGt: + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken); + ccode = kCondHi; + break; + case kCondGe: + OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken); + ccode = kCondCs; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCmpImmBranch(cu, ccode, low_reg, val_lo, taken); +} + + +void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir) +{ RegLocation rl_src1 = GetSrcWide(cu, mir, 0); RegLocation rl_src2 = GetSrcWide(cu, mir, 2); + // Normalize such that if either operand is constant, src2 will be constant. + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + if (rl_src1.is_const) { + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + ccode = FlipComparisonOrder(ccode); + } + if (rl_src2.is_const) { + RegLocation rl_temp = UpdateLocWide(cu, rl_src2); + // Do special compare/branch against simple const operand if not already in registers. + int64_t val = ConstantValueWide(cu, rl_src2); + if ((rl_temp.location != kLocPhysReg) && + ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { + GenFusedLongCmpImmBranch(cu, bb, rl_src1, val, ccode); + return; + } + } + LIR* label_list = cu->block_label_list; + LIR* taken = &label_list[bb->taken->id]; + LIR* not_taken = &label_list[bb->fall_through->id]; rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg); - ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); OpRegReg(cu, kOpCmp, rl_src1.high_reg, rl_src2.high_reg); switch(ccode) { case kCondEq: @@ -185,7 +250,7 @@ LIR* ArmCodegen::OpCmpImmBranch(CompilationUnit* cu, ConditionCode cond, int reg if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) { NewLIR2(cu, kThumbCmpRI8, reg, check_value); } else if (mod_imm >= 0) { - NewLIR2(cu, kThumb2CmpRI8, reg, mod_imm); + NewLIR2(cu, kThumb2CmpRI12, reg, mod_imm); } else { int t_reg = AllocTemp(cu); LoadConstant(cu, t_reg, check_value); @@ -523,6 +588,93 @@ bool ArmCodegen::GenNegLong(CompilationUnit* cu, RegLocation rl_dest, RegLocatio return false; } + + /* + * Check to see if a result pair has a misaligned overlap with an operand pair. This + * is not usual for dx to generate, but it is legal (for now). In a future rev of + * dex, we'll want to make this case illegal. + */ +static bool BadOverlap(CompilationUnit* cu, RegLocation rl_src, RegLocation rl_dest) +{ + DCHECK(rl_src.wide); + DCHECK(rl_dest.wide); + return (abs(SRegToVReg(cu, rl_src.s_reg_low) - SRegToVReg(cu, rl_dest.s_reg_low)) == 1); +} + +void ArmCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + /* + * To pull off inline multiply, we have a worst-case requirement of 8 temporary + * registers. Normally for Arm, we get 5. We can get to 6 by including + * lr in the temp set. The only problematic case is all operands and result are + * distinct, and none have been promoted. In that case, we can succeed by aggressively + * freeing operand temp registers after they are no longer needed. All other cases + * can proceed normally. We'll just punt on the case of the result having a misaligned + * overlap with either operand and send that case to a runtime handler. + */ + RegLocation rl_result; + if (BadOverlap(cu, rl_src1, rl_dest) || (BadOverlap(cu, rl_src2, rl_dest))) { + int func_offset = ENTRYPOINT_OFFSET(pLmul); + FlushAllRegs(cu); + CallRuntimeHelperRegLocationRegLocation(cu, func_offset, rl_src1, rl_src2, false); + rl_result = GetReturnWide(cu, false); + StoreValueWide(cu, rl_dest, rl_result); + return; + } + // Temporarily add LR to the temp pool, and assign it to tmp1 + MarkTemp(cu, rARM_LR); + FreeTemp(cu, rARM_LR); + int tmp1 = rARM_LR; + LockTemp(cu, rARM_LR); + + rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); + rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg); + + bool special_case = true; + // If operands are the same, or any pair has been promoted we're not the special case. + if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || + (!IsTemp(cu, rl_src1.low_reg) && !IsTemp(cu, rl_src1.high_reg)) || + (!IsTemp(cu, rl_src2.low_reg) && !IsTemp(cu, rl_src2.high_reg))) { + special_case = false; + } + // Tuning: if rl_dest has been promoted and is *not* either operand, could use directly. + int res_lo = AllocTemp(cu); + int res_hi; + if (rl_src1.low_reg == rl_src2.low_reg) { + res_hi = AllocTemp(cu); + NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src1.low_reg, rl_src1.high_reg); + NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src1.low_reg, rl_src1.low_reg); + OpRegRegRegShift(cu, kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); + } else { + // In the special case, all temps are now allocated + NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src2.low_reg, rl_src1.high_reg); + if (special_case) { + DCHECK_NE(rl_src1.low_reg, rl_src2.low_reg); + DCHECK_NE(rl_src1.high_reg, rl_src2.high_reg); + FreeTemp(cu, rl_src1.high_reg); + } + res_hi = AllocTemp(cu); + + NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src2.low_reg, rl_src1.low_reg); + NewLIR4(cu, kThumb2Mla, tmp1, rl_src1.low_reg, rl_src2.high_reg, tmp1); + NewLIR4(cu, kThumb2AddRRR, res_hi, tmp1, res_hi, 0); + if (special_case) { + FreeTemp(cu, rl_src1.low_reg); + Clobber(cu, rl_src1.low_reg); + Clobber(cu, rl_src1.high_reg); + } + } + FreeTemp(cu, tmp1); + rl_result = GetReturnWide(cu, false); // Just using as a template. + rl_result.low_reg = res_lo; + rl_result.high_reg = res_hi; + StoreValueWide(cu, rl_dest, rl_result); + // Now, restore lr to its non-temp status. + Clobber(cu, rARM_LR); + UnmarkTemp(cu, rARM_LR); +} + bool ArmCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { @@ -568,8 +720,11 @@ void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, Re int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; RegLocation rl_result; + bool constant_index = rl_index.is_const; rl_array = LoadValue(cu, rl_array, kCoreReg); - rl_index = LoadValue(cu, rl_index, kCoreReg); + if (!constant_index) { + rl_index = LoadValue(cu, rl_index, kCoreReg); + } if (rl_dest.wide) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); @@ -577,6 +732,11 @@ void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, Re data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } + // If index is constant, just fold it into the data offset + if (constant_index) { + data_offset += ConstantValue(cu, rl_index) << scale; + } + /* null object? */ GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags); @@ -587,27 +747,38 @@ void ArmCodegen::GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, Re /* Get len */ LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len); } - if (rl_dest.wide || rl_dest.fp) { - // No special indexed operation, lea + load w/ displacement - int reg_ptr = AllocTemp(cu); - OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, - EncodeShift(kArmLsl, scale)); - FreeTemp(cu, rl_index.low_reg); + if (rl_dest.wide || rl_dest.fp || constant_index) { + int reg_ptr; + if (constant_index) { + reg_ptr = rl_array.low_reg; // NOTE: must not alter reg_ptr in constant case. + } else { + // No special indexed operation, lea + load w/ displacement + reg_ptr = AllocTemp(cu); + OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, + EncodeShift(kArmLsl, scale)); + FreeTemp(cu, rl_index.low_reg); + } rl_result = EvalLoc(cu, rl_dest, reg_class, true); if (needs_range_check) { - // TODO: change kCondCS to a more meaningful name, is the sense of - // carry-set/clear flipped? - GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + if (constant_index) { + GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds); + } else { + GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds); + } FreeTemp(cu, reg_len); } if (rl_dest.wide) { LoadBaseDispWide(cu, reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); - FreeTemp(cu, reg_ptr); + if (!constant_index) { + FreeTemp(cu, reg_ptr); + } StoreValueWide(cu, rl_dest, rl_result); } else { LoadBaseDisp(cu, reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG); - FreeTemp(cu, reg_ptr); + if (!constant_index) { + FreeTemp(cu, reg_ptr); + } StoreValue(cu, rl_dest, rl_result); } } else { @@ -639,17 +810,28 @@ void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, Re RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; + bool constant_index = rl_index.is_const; - if (size == kLong || size == kDouble) { + if (rl_src.wide) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } + // If index is constant, just fold it into the data offset. + if (constant_index) { + data_offset += ConstantValue(cu, rl_index) << scale; + } + rl_array = LoadValue(cu, rl_array, kCoreReg); - rl_index = LoadValue(cu, rl_index, kCoreReg); - int reg_ptr = INVALID_REG; - if (IsTemp(cu, rl_array.low_reg)) { + if (!constant_index) { + rl_index = LoadValue(cu, rl_index, kCoreReg); + } + + int reg_ptr; + if (constant_index) { + reg_ptr = rl_array.low_reg; + } else if (IsTemp(cu, rl_array.low_reg)) { Clobber(cu, rl_array.low_reg); reg_ptr = rl_array.low_reg; } else { @@ -668,18 +850,25 @@ void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, Re LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len); } /* at this point, reg_ptr points to array, 2 live temps */ - if (rl_src.wide || rl_src.fp) { + if (rl_src.wide || rl_src.fp || constant_index) { if (rl_src.wide) { rl_src = LoadValueWide(cu, rl_src, reg_class); } else { rl_src = LoadValue(cu, rl_src, reg_class); } - OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, - EncodeShift(kArmLsl, scale)); + if (!constant_index) { + OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, + EncodeShift(kArmLsl, scale)); + } if (needs_range_check) { - GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + if (constant_index) { + GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds); + } else { + GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds); + } FreeTemp(cu, reg_len); } + if (rl_src.wide) { StoreBaseDispWide(cu, reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg); } else { @@ -696,7 +885,9 @@ void ArmCodegen::GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, Re StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg, scale, size); } - FreeTemp(cu, reg_ptr); + if (!constant_index) { + FreeTemp(cu, reg_ptr); + } } /* @@ -758,4 +949,163 @@ void ArmCodegen::GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation MarkGCCard(cu, r_value, r_array); } +bool ArmCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) +{ + rl_src = LoadValueWide(cu, rl_src, kCoreReg); + // Per spec, we only care about low 6 bits of shift amount. + int shift_amount = ConstantValue(cu, rl_shift) & 0x3f; + if (shift_amount == 0) { + StoreValueWide(cu, rl_dest, rl_src); + return false; // TODO: remove useless bool return result. + } + if (BadOverlap(cu, rl_src, rl_dest)) { + return GenShiftOpLong(cu, opcode, rl_dest, rl_src, rl_shift); + } + RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true); + switch(opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + if (shift_amount == 1) { + OpRegRegReg(cu, kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg); + OpRegRegReg(cu, kOpAdc, rl_result.high_reg, rl_src.high_reg, rl_src.high_reg); + } else if (shift_amount == 32) { + OpRegCopy(cu, rl_result.high_reg, rl_src.low_reg); + LoadConstant(cu, rl_result.low_reg, 0); + } else if (shift_amount > 31) { + OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.low_reg, shift_amount - 32); + LoadConstant(cu, rl_result.low_reg, 0); + } else { + OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.high_reg, shift_amount); + OpRegRegRegShift(cu, kOpOr, rl_result.high_reg, rl_result.high_reg, rl_src.low_reg, + EncodeShift(kArmLsr, 32 - shift_amount)); + OpRegRegImm(cu, kOpLsl, rl_result.low_reg, rl_src.low_reg, shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg); + OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31); + } else if (shift_amount > 31) { + OpRegRegImm(cu, kOpAsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32); + OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31); + } else { + int t_reg = AllocTemp(cu); + OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount); + OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg, + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(cu, t_reg); + OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg); + LoadConstant(cu, rl_result.high_reg, 0); + } else if (shift_amount > 31) { + OpRegRegImm(cu, kOpLsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32); + LoadConstant(cu, rl_result.high_reg, 0); + } else { + int t_reg = AllocTemp(cu); + OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount); + OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg, + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(cu, t_reg); + OpRegRegImm(cu, kOpLsr, rl_result.high_reg, rl_src.high_reg, shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + return true; + } + StoreValueWide(cu, rl_dest, rl_result); + return false; +} + +bool ArmCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { + if (!rl_src2.is_const) { + // Don't bother with special handling for subtract from immediate. + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); + } + } else { + // Normalize + if (!rl_src2.is_const) { + DCHECK(rl_src1.is_const); + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + } + } + if (BadOverlap(cu, rl_src1, rl_dest)) { + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); + } + DCHECK(rl_src2.is_const); + int64_t val = ConstantValueWide(cu, rl_src2); + uint32_t val_lo = Low32Bits(val); + uint32_t val_hi = High32Bits(val); + int32_t mod_imm_lo = ModifiedImmediate(val_lo); + int32_t mod_imm_hi = ModifiedImmediate(val_hi); + + // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit + switch(opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { + return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2); + } + break; + default: + break; + } + rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg); + RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true); + // NOTE: once we've done the EvalLoc on dest, we can no longer bail. + switch (opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + NewLIR3(cu, kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo); + NewLIR3(cu, kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi); + break; + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + if ((val_lo != 0) || (rl_result.low_reg != rl_src1.low_reg)) { + OpRegRegImm(cu, kOpOr, rl_result.low_reg, rl_src1.low_reg, val_lo); + } + if ((val_hi != 0) || (rl_result.high_reg != rl_src1.high_reg)) { + OpRegRegImm(cu, kOpOr, rl_result.high_reg, rl_src1.high_reg, val_hi); + } + break; + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + OpRegRegImm(cu, kOpXor, rl_result.low_reg, rl_src1.low_reg, val_lo); + OpRegRegImm(cu, kOpXor, rl_result.high_reg, rl_src1.high_reg, val_hi); + break; + case Instruction::AND_LONG: + case Instruction::AND_LONG_2ADDR: + if ((val_lo != 0xffffffff) || (rl_result.low_reg != rl_src1.low_reg)) { + OpRegRegImm(cu, kOpAnd, rl_result.low_reg, rl_src1.low_reg, val_lo); + } + if ((val_hi != 0xffffffff) || (rl_result.high_reg != rl_src1.high_reg)) { + OpRegRegImm(cu, kOpAnd, rl_result.high_reg, rl_src1.high_reg, val_hi); + } + break; + case Instruction::SUB_LONG_2ADDR: + case Instruction::SUB_LONG: + NewLIR3(cu, kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo); + NewLIR3(cu, kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi); + break; + default: + LOG(FATAL) << "Unexpected opcode " << opcode; + } + StoreValueWide(cu, rl_dest, rl_result); + return false; // TODO: remove bool return value from all of these Gen routines. +} + } // namespace art diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc index 433111c528..a670199580 100644 --- a/src/compiler/codegen/arm/utility_arm.cc +++ b/src/compiler/codegen/arm/utility_arm.cc @@ -45,6 +45,32 @@ static int EncodeImmSingle(int value) return res; } +/* + * Determine whether value can be encoded as a Thumb2 floating point + * immediate. If not, return -1. If so return encoded 8-bit value. + */ +static int EncodeImmDouble(int64_t value) +{ + int res; + int bit_a = (value & 0x8000000000000000ll) >> 63; + int not_bit_b = (value & 0x4000000000000000ll) >> 62; + int bit_b = (value & 0x2000000000000000ll) >> 61; + int b_smear = (value & 0x3fc0000000000000ll) >> 54; + int slice = (value & 0x003f000000000000ll) >> 48; + uint64_t zeroes = (value & 0x0000ffffffffffffll); + if (zeroes != 0) + return -1; + if (bit_b) { + if ((not_bit_b != 0) || (b_smear != 0xff)) + return -1; + } else { + if ((not_bit_b != 1) || (b_smear != 0x0)) + return -1; + } + res = (bit_a << 7) | (bit_b << 6) | slice; + return res; +} + static LIR* LoadFPConstantValue(CompilationUnit* cu, int r_dest, int value) { DCHECK(ARM_SINGLEREG(r_dest)); @@ -126,19 +152,24 @@ int ArmCodegen::ModifiedImmediate(uint32_t value) return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */ } -bool ArmCodegen::InexpensiveConstant(int reg, int value) +bool ArmCodegen::InexpensiveConstantInt(int32_t value) { - bool res = false; - if (ARM_FPREG(reg)) { - res = (EncodeImmSingle(value) >= 0); - } else { - if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) { - res = true; - } else { - res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); - } - } - return res; + return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); +} + +bool ArmCodegen::InexpensiveConstantFloat(int32_t value) +{ + return EncodeImmSingle(value) >= 0; +} + +bool ArmCodegen::InexpensiveConstantLong(int64_t value) +{ + return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); +} + +bool ArmCodegen::InexpensiveConstantDouble(int64_t value) +{ + return EncodeImmDouble(value) >= 0; } /* @@ -178,25 +209,9 @@ LIR* ArmCodegen::LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int valu res = NewLIR2(cu, kThumb2MovImm16, r_dest, value); return res; } - /* No shortcut - go ahead and use literal pool */ - LIR* data_target = ScanLiteralPool(cu->literal_list, value, 0); - if (data_target == NULL) { - data_target = AddWordData(cu, &cu->literal_list, value); - } - LIR* load_pc_rel = RawLIR(cu, cu->current_dalvik_offset, - kThumb2LdrPcRel12, r_dest, 0, 0, 0, 0, data_target); - SetMemRefType(cu, load_pc_rel, true, kLiteral); - load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); - res = load_pc_rel; - AppendLIR(cu, load_pc_rel); - - /* - * To save space in the constant pool, we use the ADD_RRI8 instruction to - * add up to 255 to an existing constant value. - */ - if (data_target->operands[0] != value) { - OpRegImm(cu, kOpAdd, r_dest, value - data_target->operands[0]); - } + /* Do a low/high pair */ + res = NewLIR2(cu, kThumb2MovImm16, r_dest, Low16Bits(value)); + NewLIR2(cu, kThumb2MovImm16H, r_dest, High16Bits(value)); return res; } @@ -514,7 +529,7 @@ LIR* ArmCodegen::OpRegRegImm(CompilationUnit* cu, OpKind op, int r_dest, int r_s int mod_imm = ModifiedImmediate(value); LIR* res; if (mod_imm >= 0) { - res = NewLIR2(cu, kThumb2CmpRI8, r_src1, mod_imm); + res = NewLIR2(cu, kThumb2CmpRI12, r_src1, mod_imm); } else { int r_tmp = AllocTemp(cu); res = LoadConstant(cu, r_tmp, value); @@ -587,44 +602,11 @@ LIR* ArmCodegen::OpRegImm(CompilationUnit* cu, OpKind op, int r_dest_src1, int v } } -/* - * Determine whether value can be encoded as a Thumb2 floating point - * immediate. If not, return -1. If so return encoded 8-bit value. - */ -static int EncodeImmDoubleHigh(int value) +LIR* ArmCodegen::LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value) { - int res; - int bit_a = (value & 0x80000000) >> 31; - int not_bit_b = (value & 0x40000000) >> 30; - int bit_b = (value & 0x20000000) >> 29; - int b_smear = (value & 0x3fc00000) >> 22; - int slice = (value & 0x003f0000) >> 16; - int zeroes = (value & 0x0000ffff); - if (zeroes != 0) - return -1; - if (bit_b) { - if ((not_bit_b != 0) || (b_smear != 0xff)) - return -1; - } else { - if ((not_bit_b != 1) || (b_smear != 0x0)) - return -1; - } - res = (bit_a << 7) | (bit_b << 6) | slice; - return res; -} - -static int EncodeImmDouble(int val_lo, int val_hi) -{ - int res = -1; - if (val_lo == 0) - res = EncodeImmDoubleHigh(val_hi); - return res; -} - -LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, - int val_lo, int val_hi) -{ - LIR* res; + LIR* res = NULL; + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); int target_reg = S2d(r_dest_lo, r_dest_hi); if (ARM_FPREG(r_dest_lo)) { if ((val_lo == 0) && (val_hi == 0)) { @@ -635,26 +617,33 @@ LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r // +0.0 = +2.0 - +2.0 res = NewLIR3(cu, kThumb2Vsubd, target_reg, target_reg, target_reg); } else { - int encoded_imm = EncodeImmDouble(val_lo, val_hi); + int encoded_imm = EncodeImmDouble(value); if (encoded_imm >= 0) { res = NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, encoded_imm); - } else { - LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi); - if (data_target == NULL) { - data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi); - } - LIR* load_pc_rel = - RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd, - target_reg, r15pc, 0, 0, 0, data_target); - SetMemRefType(cu, load_pc_rel, true, kLiteral); - load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); - AppendLIR(cu, load_pc_rel); - res = load_pc_rel; } } } else { - res = LoadConstantNoClobber(cu, r_dest_lo, val_lo); - LoadConstantNoClobber(cu, r_dest_hi, val_hi); + if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) { + res = LoadConstantNoClobber(cu, r_dest_lo, val_lo); + LoadConstantNoClobber(cu, r_dest_hi, val_hi); + } + } + if (res == NULL) { + // No short form - load from the literal pool. + LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi); + if (data_target == NULL) { + data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi); + } + if (ARM_FPREG(r_dest_lo)) { + res = RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd, + target_reg, r15pc, 0, 0, 0, data_target); + } else { + res = RawLIR(cu, cu->current_dalvik_offset, kThumb2LdrdPcRel8, + r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target); + } + SetMemRefType(cu, res, true, kLiteral); + res->alias_info = reinterpret_cast<uintptr_t>(data_target); + AppendLIR(cu, res); } return res; } @@ -732,7 +721,7 @@ LIR* ArmCodegen::StoreBaseIndexed(CompilationUnit* cu, int rBase, int r_index, i int scale, OpSize size) { bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_src); - LIR* store; + LIR* store = NULL; ArmOpcode opcode = kThumbBkpt; bool thumb_form = (all_low_regs && (scale == 0)); int reg_ptr; @@ -798,14 +787,14 @@ LIR* ArmCodegen::LoadBaseDispBody(CompilationUnit* cu, int rBase, int displaceme int r_dest_hi, OpSize size, int s_reg) { Codegen* cg = cu->cg.get(); - LIR* res; - LIR* load; + LIR* load = NULL; ArmOpcode opcode = kThumbBkpt; bool short_form = false; bool thumb2Form = (displacement < 4092 && displacement >= 0); bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_dest)); int encoded_disp = displacement; bool is64bit = false; + bool already_generated = false; switch (size) { case kDouble: case kLong: @@ -822,11 +811,15 @@ LIR* ArmCodegen::LoadBaseDispBody(CompilationUnit* cu, int rBase, int displaceme } break; } else { - res = LoadBaseDispBody(cu, rBase, displacement, r_dest, - -1, kWord, s_reg); - LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi, - -1, kWord, INVALID_SREG); - return res; + if (displacement <= 1020) { + load = NewLIR4(cu, kThumb2LdrdI8, r_dest, r_dest_hi, rBase, displacement >> 2); + } else { + load = LoadBaseDispBody(cu, rBase, displacement, r_dest, + -1, kWord, s_reg); + LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi, + -1, kWord, INVALID_SREG); + } + already_generated = true; } case kSingle: case kWord: @@ -894,13 +887,15 @@ LIR* ArmCodegen::LoadBaseDispBody(CompilationUnit* cu, int rBase, int displaceme LOG(FATAL) << "Bad size: " << size; } - if (short_form) { - load = res = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp); - } else { - int reg_offset = AllocTemp(cu); - res = cg->LoadConstant(cu, reg_offset, encoded_disp); - load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size); - FreeTemp(cu, reg_offset); + if (!already_generated) { + if (short_form) { + load = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp); + } else { + int reg_offset = AllocTemp(cu); + cg->LoadConstant(cu, reg_offset, encoded_disp); + load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size); + FreeTemp(cu, reg_offset); + } } // TODO: in future may need to differentiate Dalvik accesses w/ spills @@ -926,30 +921,36 @@ LIR* ArmCodegen::LoadBaseDispWide(CompilationUnit* cu, int rBase, int displaceme LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacement, int r_src, int r_src_hi, OpSize size) { Codegen* cg = cu->cg.get(); - LIR* res, *store; + LIR* store = NULL; ArmOpcode opcode = kThumbBkpt; bool short_form = false; bool thumb2Form = (displacement < 4092 && displacement >= 0); bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_src)); int encoded_disp = displacement; bool is64bit = false; + bool already_generated = false; switch (size) { case kLong: case kDouble: is64bit = true; if (!ARM_FPREG(r_src)) { - res = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord); - StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord); - return res; - } - if (ARM_SINGLEREG(r_src)) { - DCHECK(ARM_FPREG(r_src_hi)); - r_src = cg->S2d(r_src, r_src_hi); - } - opcode = kThumb2Vstrd; - if (displacement <= 1020) { - short_form = true; - encoded_disp >>= 2; + if (displacement <= 1020) { + store = NewLIR4(cu, kThumb2StrdI8, r_src, r_src_hi, rBase, displacement >> 2); + } else { + store = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord); + StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord); + } + already_generated = true; + } else { + if (ARM_SINGLEREG(r_src)) { + DCHECK(ARM_FPREG(r_src_hi)); + r_src = cg->S2d(r_src, r_src_hi); + } + opcode = kThumb2Vstrd; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } } break; case kSingle: @@ -998,20 +999,22 @@ LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacem default: LOG(FATAL) << "Bad size: " << size; } - if (short_form) { - store = res = NewLIR3(cu, opcode, r_src, rBase, encoded_disp); - } else { - int r_scratch = AllocTemp(cu); - res = cg->LoadConstant(cu, r_scratch, encoded_disp); - store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size); - FreeTemp(cu, r_scratch); + if (!already_generated) { + if (short_form) { + store = NewLIR3(cu, opcode, r_src, rBase, encoded_disp); + } else { + int r_scratch = AllocTemp(cu); + cg->LoadConstant(cu, r_scratch, encoded_disp); + store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size); + FreeTemp(cu, r_scratch); + } } // TODO: In future, may need to differentiate Dalvik & spill accesses if (rBase == rARM_SP) { AnnotateDalvikRegAccess(cu, store, displacement >> 2, false /* is_load */, is64bit); } - return res; + return store; } LIR* ArmCodegen::StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src, |