diff options
Diffstat (limited to 'compiler')
50 files changed, 3397 insertions, 538 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 70c7e521a6..a75417bcbc 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -86,6 +86,7 @@ LIBART_COMPILER_SRC_FILES := \ jni/quick/jni_compiler.cc \ llvm/llvm_compiler.cc \ optimizing/builder.cc \ + optimizing/bounds_check_elimination.cc \ optimizing/code_generator.cc \ optimizing/code_generator_arm.cc \ optimizing/code_generator_arm64.cc \ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 3b3170e511..a3fe8ad038 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -606,7 +606,7 @@ enum SelectInstructionKind { }; std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind); -// LIR fixup kinds for Arm +// LIR fixup kinds for Arm and X86. enum FixupKind { kFixupNone, kFixupLabel, // For labels we just adjust the offset. @@ -624,6 +624,7 @@ enum FixupKind { kFixupMovImmHST, // kThumb2MovImm16HST. kFixupAlign4, // Align to 4-byte boundary. kFixupA53Erratum835769, // Cortex A53 Erratum 835769. + kFixupSwitchTable, // X86_64 packed switch table. }; std::ostream& operator<<(std::ostream& os, const FixupKind& kind); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 55f2abcd3f..fd4c3d7d77 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -526,6 +526,9 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { default: LOG(ERROR) << "Unexpected opcode: " << opcode; } mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + // Clear use count of temp VR. + use_counts_[mir->ssa_rep->defs[0]] = 0; + raw_use_counts_[mir->ssa_rep->defs[0]] = 0; // Copy the SSA information that is relevant. mir_next->ssa_rep->num_uses = mir->ssa_rep->num_uses; mir_next->ssa_rep->uses = mir->ssa_rep->uses; @@ -795,10 +798,6 @@ bool MIRGraph::LayoutBlocks(BasicBlock* bb) { break; } walker = prev; - - if (walker->visited) { - break; - } } return false; } diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index 4c7f87433d..b9d9a1111d 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -488,6 +488,7 @@ enum ArmOpcode { kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2OrnRRI8M, // orn rd, rn, #<const> [11110] i [000110] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2EorRRI8M, // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2AddRRI8M, // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2AdcRRI8M, // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 76ec9dfbc5..de93e2602b 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -788,6 +788,10 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, "orr", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2OrnRRI8M, 0xf0600000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "orn", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2EorRRI8M, 0xf0800000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index c3b19a36f0..0ae7ee3560 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -219,10 +219,11 @@ class ArmMir2Lir FINAL : public Mir2Lir { int EncodeShift(int code, int amount); int ModifiedImmediate(uint32_t value); ArmConditionCode ArmConditionEncoding(ConditionCode code); - bool InexpensiveConstantInt(int32_t value); - bool InexpensiveConstantFloat(int32_t value); - bool InexpensiveConstantLong(int64_t value); - bool InexpensiveConstantDouble(int64_t value); + bool InexpensiveConstantInt(int32_t value) OVERRIDE; + bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE; + bool InexpensiveConstantFloat(int32_t value) OVERRIDE; + bool InexpensiveConstantLong(int64_t value) OVERRIDE; + bool InexpensiveConstantDouble(int64_t value) OVERRIDE; RegStorage AllocPreservedDouble(int s_reg); RegStorage AllocPreservedSingle(int s_reg); diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 4aedbafcfb..1a7b4395d8 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -592,7 +592,6 @@ bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) { // Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms. bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { - GetEasyMultiplyOp(lit, &ops[0]); if (GetEasyMultiplyOp(lit, &ops[0])) { ops[1].op = kOpInvalid; ops[1].shift = 0; diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 7168b9f30b..36d065f0db 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -97,31 +97,11 @@ LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) { return load_pc_rel; } -static int LeadingZeros(uint32_t val) { - uint32_t alt; - int32_t n; - int32_t count; - - count = 16; - n = 32; - do { - alt = val >> count; - if (alt != 0) { - n = n - count; - val = alt; - } - count >>= 1; - } while (count); - return n - val; -} - /* * Determine whether value can be encoded as a Thumb2 modified * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. */ int ArmMir2Lir::ModifiedImmediate(uint32_t value) { - int32_t z_leading; - int32_t z_trailing; uint32_t b0 = value & 0xff; /* Note: case of value==0 must use 0:000:0:0000000 encoding */ @@ -135,8 +115,8 @@ int ArmMir2Lir::ModifiedImmediate(uint32_t value) { if (value == ((b0 << 24) | (b0 << 8))) return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */ /* Can we do it with rotation? */ - z_leading = LeadingZeros(value); - z_trailing = 32 - LeadingZeros(~value & (value - 1)); + int z_leading = CLZ(value); + int z_trailing = CTZ(value); /* A run of eight or fewer active bits? */ if ((z_leading + z_trailing) < 24) return -1; /* No - bail */ @@ -152,6 +132,71 @@ bool ArmMir2Lir::InexpensiveConstantInt(int32_t value) { return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); } +bool ArmMir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { + switch (opcode) { + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + if ((value >> 12) == (value >> 31)) { // Signed 12-bit, RRI12 versions of ADD/SUB. + return true; + } + FALLTHROUGH_INTENDED; + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: + return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(-value) >= 0); + case Instruction::SHL_INT: + case Instruction::SHL_INT_2ADDR: + case Instruction::SHR_INT: + case Instruction::SHR_INT_2ADDR: + case Instruction::USHR_INT: + case Instruction::USHR_INT_2ADDR: + return true; + case Instruction::CONST: + case Instruction::CONST_4: + case Instruction::CONST_16: + if ((value >> 16) == 0) { + return true; // movw, 16-bit unsigned. + } + FALLTHROUGH_INTENDED; + case Instruction::AND_INT: + case Instruction::AND_INT_2ADDR: + case Instruction::AND_INT_LIT16: + case Instruction::AND_INT_LIT8: + case Instruction::OR_INT: + case Instruction::OR_INT_2ADDR: + case Instruction::OR_INT_LIT16: + case Instruction::OR_INT_LIT8: + return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); + case Instruction::XOR_INT: + case Instruction::XOR_INT_2ADDR: + case Instruction::XOR_INT_LIT16: + case Instruction::XOR_INT_LIT8: + return (ModifiedImmediate(value) >= 0); + case Instruction::MUL_INT: + case Instruction::MUL_INT_2ADDR: + case Instruction::MUL_INT_LIT8: + case Instruction::MUL_INT_LIT16: + case Instruction::DIV_INT: + case Instruction::DIV_INT_2ADDR: + case Instruction::DIV_INT_LIT8: + case Instruction::DIV_INT_LIT16: + case Instruction::REM_INT: + case Instruction::REM_INT_2ADDR: + case Instruction::REM_INT_LIT8: + case Instruction::REM_INT_LIT16: { + EasyMultiplyOp ops[2]; + return GetEasyMultiplyTwoOps(value, ops); + } + default: + return false; + } +} + bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) { return EncodeImmSingle(value) >= 0; } @@ -510,7 +555,7 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, in op = (op == kOpAdd) ? kOpSub : kOpAdd; } } - if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) { + if (mod_imm < 0 && (abs_value >> 12) == 0) { // This is deliberately used only if modified immediate encoding is inadequate since // we sometimes actually use the flags for small values but not necessarily low regs. if (op == kOpAdd) @@ -542,6 +587,12 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, in case kOpOr: opcode = kThumb2OrrRRI8M; alt_opcode = kThumb2OrrRRR; + if (mod_imm < 0) { + mod_imm = ModifiedImmediate(~value); + if (mod_imm >= 0) { + opcode = kThumb2OrnRRI8M; + } + } break; case kOpAnd: if (mod_imm < 0) { @@ -855,12 +906,12 @@ LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStora */ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size) { - LIR* load = NULL; - ArmOpcode opcode = kThumbBkpt; + LIR* load = nullptr; + ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode. + ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode. bool short_form = false; - bool thumb2Form = (displacement < 4092 && displacement >= 0); bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8(); - int encoded_disp = displacement; + int scale = 0; // Used for opcode16 and some indexed loads. bool already_generated = false; switch (size) { case kDouble: @@ -888,57 +939,45 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag already_generated = true; break; } + DCHECK_EQ((displacement & 0x3), 0); + scale = 2; if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && (displacement >= 0)) { short_form = true; - encoded_disp >>= 2; - opcode = kThumbLdrPcRel; + opcode16 = kThumbLdrPcRel; } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) && (displacement >= 0)) { short_form = true; - encoded_disp >>= 2; - opcode = kThumbLdrSpRel; - } else if (all_low && displacement < 128 && displacement >= 0) { - DCHECK_EQ((displacement & 0x3), 0); - short_form = true; - encoded_disp >>= 2; - opcode = kThumbLdrRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrRRI12; + opcode16 = kThumbLdrSpRel; + } else { + short_form = all_low && (displacement >> (5 + scale)) == 0; + opcode16 = kThumbLdrRRI5; + opcode32 = kThumb2LdrRRI12; } break; case kUnsignedHalf: - if (all_low && displacement < 64 && displacement >= 0) { - DCHECK_EQ((displacement & 0x1), 0); - short_form = true; - encoded_disp >>= 1; - opcode = kThumbLdrhRRI5; - } else if (displacement < 4092 && displacement >= 0) { - short_form = true; - opcode = kThumb2LdrhRRI12; - } + DCHECK_EQ((displacement & 0x1), 0); + scale = 1; + short_form = all_low && (displacement >> (5 + scale)) == 0; + opcode16 = kThumbLdrhRRI5; + opcode32 = kThumb2LdrhRRI12; break; case kSignedHalf: - if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrshRRI12; - } + DCHECK_EQ((displacement & 0x1), 0); + scale = 1; + DCHECK_EQ(opcode16, kThumbBkpt); // Not available. + opcode32 = kThumb2LdrshRRI12; break; case kUnsignedByte: - if (all_low && displacement < 32 && displacement >= 0) { - short_form = true; - opcode = kThumbLdrbRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrbRRI12; - } + DCHECK_EQ(scale, 0); // Keep scale = 0. + short_form = all_low && (displacement >> (5 + scale)) == 0; + opcode16 = kThumbLdrbRRI5; + opcode32 = kThumb2LdrbRRI12; break; case kSignedByte: - if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrsbRRI12; - } + DCHECK_EQ(scale, 0); // Keep scale = 0. + DCHECK_EQ(opcode16, kThumbBkpt); // Not available. + opcode32 = kThumb2LdrsbRRI12; break; default: LOG(FATAL) << "Bad size: " << size; @@ -946,12 +985,33 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag if (!already_generated) { if (short_form) { - load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp); + load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale); + } else if ((displacement >> 12) == 0) { // Thumb2 form. + load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement); + } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) && + InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) { + // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can + // actually do it in two because we know that the kOpAdd is a single insn. On the + // other hand, we introduce an extra dependency, so this is not necessarily faster. + if (opcode16 != kThumbBkpt && r_dest.Low8() && + InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) { + // We can use the 16-bit Thumb opcode for the load. + OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale)); + load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f); + } else { + DCHECK_NE(opcode32, kThumbBkpt); + OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff); + load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff); + } } else { + if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) || + (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) { + scale = 0; // Prefer unscaled indexing if the same number of insns. + } RegStorage reg_offset = AllocTemp(); - LoadConstant(reg_offset, encoded_disp); + LoadConstant(reg_offset, displacement >> scale); DCHECK(!r_dest.IsFloat()); - load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); + load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size); FreeTemp(reg_offset); } } @@ -997,12 +1057,12 @@ LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size) { - LIR* store = NULL; - ArmOpcode opcode = kThumbBkpt; + LIR* store = nullptr; + ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode. + ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode. bool short_form = false; - bool thumb2Form = (displacement < 4092 && displacement >= 0); bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8(); - int encoded_disp = displacement; + int scale = 0; // Used for opcode16 and some indexed loads. bool already_generated = false; switch (size) { case kDouble: @@ -1034,53 +1094,67 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora already_generated = true; break; } + DCHECK_EQ((displacement & 0x3), 0); + scale = 2; if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { short_form = true; - encoded_disp >>= 2; - opcode = kThumbStrSpRel; - } else if (all_low && displacement < 128 && displacement >= 0) { - DCHECK_EQ((displacement & 0x3), 0); - short_form = true; - encoded_disp >>= 2; - opcode = kThumbStrRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2StrRRI12; + opcode16 = kThumbStrSpRel; + } else { + short_form = all_low && (displacement >> (5 + scale)) == 0; + opcode16 = kThumbStrRRI5; + opcode32 = kThumb2StrRRI12; } break; case kUnsignedHalf: case kSignedHalf: - if (all_low && displacement < 64 && displacement >= 0) { - DCHECK_EQ((displacement & 0x1), 0); - short_form = true; - encoded_disp >>= 1; - opcode = kThumbStrhRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2StrhRRI12; - } + DCHECK_EQ((displacement & 0x1), 0); + scale = 1; + short_form = all_low && (displacement >> (5 + scale)) == 0; + opcode16 = kThumbStrhRRI5; + opcode32 = kThumb2StrhRRI12; break; case kUnsignedByte: case kSignedByte: - if (all_low && displacement < 32 && displacement >= 0) { - short_form = true; - opcode = kThumbStrbRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2StrbRRI12; - } + DCHECK_EQ(scale, 0); // Keep scale = 0. + short_form = all_low && (displacement >> (5 + scale)) == 0; + opcode16 = kThumbStrbRRI5; + opcode32 = kThumb2StrbRRI12; break; default: LOG(FATAL) << "Bad size: " << size; } if (!already_generated) { if (short_form) { - store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp); + store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale); + } else if ((displacement >> 12) == 0) { + store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement); + } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) && + InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) { + // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can + // actually do it in two because we know that the kOpAdd is a single insn. On the + // other hand, we introduce an extra dependency, so this is not necessarily faster. + RegStorage r_scratch = AllocTemp(); + if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() && + InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) { + // We can use the 16-bit Thumb opcode for the load. + OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale)); + store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(), + (displacement >> scale) & 0x1f); + } else { + DCHECK_NE(opcode32, kThumbBkpt); + OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff); + store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff); + } + FreeTemp(r_scratch); } else { + if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) || + (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) { + scale = 0; // Prefer unscaled indexing if the same number of insns. + } RegStorage r_scratch = AllocTemp(); - LoadConstant(r_scratch, encoded_disp); + LoadConstant(r_scratch, displacement >> scale); DCHECK(!r_src.IsFloat()); - store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); + store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size); FreeTemp(r_scratch); } } diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index e1b4546e6c..cc61e93d82 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -538,9 +538,12 @@ void Mir2Lir::InstallSwitchTables() { bx_offset = tab_rec->anchor->offset + 4; break; case kX86: - case kX86_64: bx_offset = 0; break; + case kX86_64: + // RIP relative to switch table. + bx_offset = tab_rec->offset; + break; case kArm64: case kMips: bx_offset = tab_rec->anchor->offset; @@ -775,6 +778,10 @@ void Mir2Lir::CreateNativeGcMap() { ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file); native_gc_map_builder.AddEntry(native_offset, references); } + + // Maybe not necessary, but this could help prevent errors where we access the verified method + // after it has been deleted. + mir_graph_->GetCurrentDexCompilationUnit()->ClearVerifiedMethod(); } /* Determine the offset of each literal field */ diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 84d68d2b7b..ad2ed01ad5 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -553,7 +553,7 @@ std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs) { } static bool NeedsRex(int32_t raw_reg) { - return RegStorage::RegNum(raw_reg) > 7; + return raw_reg != kRIPReg && RegStorage::RegNum(raw_reg) > 7; } static uint8_t LowRegisterBits(int32_t raw_reg) { @@ -689,7 +689,13 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int entry->opcode != kX86Lea32RM && entry->opcode != kX86Lea64RM) { DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name; } - size += IS_SIMM8(displacement) ? 1 : 4; + if (raw_base == kRIPReg) { + DCHECK(cu_->target64) << + "Attempt to use a 64-bit RIP adressing with instruction " << entry->name; + size += 4; + } else { + size += IS_SIMM8(displacement) ? 1 : 4; + } } } size += entry->skeleton.immediate_bytes; @@ -1022,14 +1028,24 @@ void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) { void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) { DCHECK_LT(reg_or_opcode, 8); - DCHECK_LT(base, 8); - uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base; - code_buffer_.push_back(modrm); - if (base == rs_rX86_SP_32.GetRegNum()) { - // Special SIB for SP base - code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum()); + if (base == kRIPReg) { + // x86_64 RIP handling: always 32 bit displacement. + uint8_t modrm = (0x0 << 6) | (reg_or_opcode << 3) | 0x5; + code_buffer_.push_back(modrm); + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + } else { + DCHECK_LT(base, 8); + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base; + code_buffer_.push_back(modrm); + if (base == rs_rX86_SP_32.GetRegNum()) { + // Special SIB for SP base + code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum()); + } + EmitDisp(base, disp); } - EmitDisp(base, disp); } void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, @@ -1141,7 +1157,7 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32 CheckValidByteRegister(entry, raw_reg); EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base); uint8_t low_reg = LowRegisterBits(raw_reg); - uint8_t low_base = LowRegisterBits(raw_base); + uint8_t low_base = (raw_base == kRIPReg) ? raw_base : LowRegisterBits(raw_base); EmitModrmDisp(low_reg, low_base, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); @@ -1758,12 +1774,29 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); CodeOffset target = target_lir->offset; - lir->operands[2] = target; - int newSize = GetInsnSize(lir); - if (newSize != lir->flags.size) { - lir->flags.size = newSize; - res = kRetryAll; + // Handle 64 bit RIP addressing. + if (lir->operands[1] == kRIPReg) { + // Offset is relative to next instruction. + lir->operands[2] = target - (lir->offset + lir->flags.size); + } else { + lir->operands[2] = target; + int newSize = GetInsnSize(lir); + if (newSize != lir->flags.size) { + lir->flags.size = newSize; + res = kRetryAll; + } } + } else if (lir->flags.fixup == kFixupSwitchTable) { + DCHECK(cu_->target64); + DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name; + DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg)); + // Grab the target offset from the saved data. + Mir2Lir::EmbeddedData* tab_rec = + reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(lir->operands[4])); + CodeOffset target = tab_rec->offset; + // Handle 64 bit RIP addressing. + // Offset is relative to next instruction. + lir->operands[2] = target - (lir->offset + lir->flags.size); } break; } diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index be10d93a97..544ac3b815 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -142,25 +142,7 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat // Get the switch value rl_src = LoadValue(rl_src, kCoreReg); - // NewLIR0(kX86Bkpt); - // Materialize a pointer to the switch table - RegStorage start_of_method_reg; - if (base_of_code_ != nullptr) { - // We can use the saved value. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } - start_of_method_reg = rl_method.reg; - store_method_addr_used_ = true; - } else { - start_of_method_reg = AllocTempRef(); - NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); - } - DCHECK_EQ(start_of_method_reg.Is64Bit(), cu_->target64); int low_key = s4FromSwitchData(&table[2]); RegStorage keyReg; // Remove the bias, if necessary @@ -170,19 +152,49 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat keyReg = AllocTemp(); OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key); } + // Bounds check - if < 0 or >= size continue following switch OpRegImm(kOpCmp, keyReg, size - 1); LIR* branch_over = OpCondBranch(kCondHi, NULL); - // Load the displacement from the switch table - RegStorage disp_reg = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), - 2, WrapPointer(tab_rec)); - // Add displacement to start of method - OpRegReg(kOpAdd, start_of_method_reg, cu_->target64 ? As64BitReg(disp_reg) : disp_reg); + RegStorage addr_for_jump; + if (cu_->target64) { + RegStorage table_base = AllocTempWide(); + // Load the address of the table into table_base. + LIR* lea = RawLIR(current_dalvik_offset_, kX86Lea64RM, table_base.GetReg(), kRIPReg, + 256, 0, WrapPointer(tab_rec)); + lea->flags.fixup = kFixupSwitchTable; + AppendLIR(lea); + + // Load the offset from the table out of the table. + addr_for_jump = AllocTempWide(); + NewLIR5(kX86MovsxdRA, addr_for_jump.GetReg(), table_base.GetReg(), keyReg.GetReg(), 2, 0); + + // Add the offset from the table to the table base. + OpRegReg(kOpAdd, addr_for_jump, table_base); + } else { + // Materialize a pointer to the switch table. + RegStorage start_of_method_reg; + if (base_of_code_ != nullptr) { + // We can use the saved value. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + rl_method = LoadValue(rl_method, kCoreReg); + start_of_method_reg = rl_method.reg; + store_method_addr_used_ = true; + } else { + start_of_method_reg = AllocTempRef(); + NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); + } + // Load the displacement from the switch table. + addr_for_jump = AllocTemp(); + NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), + 2, WrapPointer(tab_rec)); + // Add displacement to start of method. + OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg); + } + // ..and go! - LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg()); - tab_rec->anchor = switch_branch; + tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg()); /* branch_over target here */ LIR* target = NewLIR0(kPseudoTargetLabel); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 80cdc83497..85ab92bc08 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1289,6 +1289,18 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { } LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { + if (cu_->target64) { + // We can do this directly using RIP addressing. + // We don't know the proper offset for the value, so pick one that will force + // 4 byte offset. We will fix this up in the assembler later to have the right + // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256); + res->target = target; + res->flags.fixup = kFixupLoad; + return res; + } + CHECK(base_of_code_ != nullptr); // Address the start of the method @@ -1309,7 +1321,6 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 0, 0, target); res->target = target; res->flags.fixup = kFixupLoad; - store_method_addr_used_ = true; return res; } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index e16a70b555..5f6cdda0d3 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -206,7 +206,7 @@ static const RegStorage RegStorage32FromSpecialTargetRegister_Target64[] { RegStorage::InvalidReg(), // kSelf - Thread pointer. RegStorage::InvalidReg(), // kSuspend - Used to reduce suspend checks for some targets. RegStorage::InvalidReg(), // kLr - no register as the return address is pushed on entry. - RegStorage::InvalidReg(), // kPc - TODO: RIP based addressing. + RegStorage(kRIPReg), // kPc rs_rX86_SP_32, // kSp rs_rDI, // kArg0 rs_rSI, // kArg1 @@ -662,6 +662,12 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { xp_reg_info->SetIsTemp(true); } + // Special Handling for x86_64 RIP addressing. + if (cu_->target64) { + RegisterInfo* info = new (arena_) RegisterInfo(RegStorage(kRIPReg), kEncodeNone); + reginfo_map_[kRIPReg] = info; + } + // Alias single precision xmm to double xmms. // TODO: as needed, add larger vector sizes - alias all to the largest. for (RegisterInfo* info : reg_pool_->sp_regs_) { @@ -1609,9 +1615,6 @@ void X86Mir2Lir::GenConst128(MIR* mir) { } void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) { - // The literal pool needs position independent logic. - store_method_addr_used_ = true; - // To deal with correct memory ordering, reverse order of constants. int32_t constants[4]; constants[3] = mir->dalvikInsn.arg[0]; @@ -1625,20 +1628,28 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) { data_target = AddVectorLiteral(constants); } - // Address the start of the method. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } - // Load the proper value from the literal area. // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. + // 4 byte offset. We will fix this up in the assembler later to have the + // right value. + LIR* load; ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR *load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */); + if (cu_->target64) { + load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */); + } else { + // Address the start of the method. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } + + load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */); + + // The literal pool needs position independent logic. + store_method_addr_used_ = true; + } load->flags.fixup = kFixupLoad; load->target = data_target; } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index ad3222cd84..3b58698619 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -570,32 +570,36 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (is_fp) { DCHECK(r_dest.IsDouble()); if (value == 0) { - return NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val); - } else if (base_of_code_ != nullptr) { + return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val); + } else if (base_of_code_ != nullptr || cu_->target64) { // We will load the value from the literal area. LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); if (data_target == NULL) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } - // Address the start of the method - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } - // Load the proper value from the literal area. - // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. + // We don't know the proper offset for the value, so pick one that + // will force 4 byte offset. We will fix this up in the assembler + // later to have the right value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), - kDouble, kNotVolatile); + if (cu_->target64) { + res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */); + } else { + // Address the start of the method. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } + + res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), + kDouble, kNotVolatile); + store_method_addr_used_ = true; + } res->target = data_target; res->flags.fixup = kFixupLoad; - store_method_addr_used_ = true; } else { if (r_dest.IsPair()) { if (val_lo == 0) { @@ -960,12 +964,14 @@ void X86Mir2Lir::AnalyzeMIR() { curr_bb = iter.Next(); } - // Did we need a pointer to the method code? + // Did we need a pointer to the method code? Not in 64 bit mode. + base_of_code_ = nullptr; + + // store_method_addr_ must be false for x86_64, since RIP addressing is used. + CHECK(!(cu_->target64 && store_method_addr_)); if (store_method_addr_) { - base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, cu_->target64 == true); + base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); DCHECK(base_of_code_ != nullptr); - } else { - base_of_code_ = nullptr; } } @@ -994,19 +1000,22 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) { AnalyzeFPInstruction(opcode, bb, mir); break; case kMirOpConstVector: - store_method_addr_ = true; + if (!cu_->target64) { + store_method_addr_ = true; + } break; case kMirOpPackedMultiply: case kMirOpPackedShiftLeft: case kMirOpPackedSignedShiftRight: - case kMirOpPackedUnsignedShiftRight: { - // Byte emulation requires constants from the literal pool. - OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16); - if (opsize == kSignedByte || opsize == kUnsignedByte) { - store_method_addr_ = true; + case kMirOpPackedUnsignedShiftRight: + if (!cu_->target64) { + // Byte emulation requires constants from the literal pool. + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16); + if (opsize == kSignedByte || opsize == kUnsignedByte) { + store_method_addr_ = true; + } } break; - } default: // Ignore the rest. break; @@ -1016,6 +1025,7 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) { void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { // Looking for // - Do we need a pointer to the code (used for packed switches and double lits)? + // 64 bit uses RIP addressing instead. switch (opcode) { // Instructions referencing doubles. @@ -1038,7 +1048,9 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { // Packed switches and array fills need a pointer to the base of the method. case Instruction::FILL_ARRAY_DATA: case Instruction::PACKED_SWITCH: - store_method_addr_ = true; + if (!cu_->target64) { + store_method_addr_ = true; + } break; case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: @@ -1115,7 +1127,8 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) { void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) { UNUSED(opcode, bb); - // For now this is only actual for x86-32. + + // 64 bit RIP addressing doesn't need store_method_addr_ set. if (cu_->target64) { return; } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 76a67c4d6c..3e0a8527a3 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -217,6 +217,9 @@ enum X86NativeRegisterPool { xr14 = RegStorage::k128BitSolo | 14, xr15 = RegStorage::k128BitSolo | 15, + // Special value for RIP 64 bit addressing. + kRIPReg = 255, + // TODO: as needed, add 256, 512 and 1024-bit xmm views. }; diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index 4929b5b2b0..932a532e56 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -84,6 +84,15 @@ const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref return (it != verified_methods_.end()) ? it->second : nullptr; } +void VerificationResults::RemoveVerifiedMethod(MethodReference ref) { + WriterMutexLock mu(Thread::Current(), verified_methods_lock_); + auto it = verified_methods_.find(ref); + if (it != verified_methods_.end()) { + delete it->second; + verified_methods_.erase(it); + } +} + void VerificationResults::AddRejectedClass(ClassReference ref) { { WriterMutexLock mu(Thread::Current(), rejected_classes_lock_); diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h index 0e7923fbc3..7fc2a2363d 100644 --- a/compiler/dex/verification_results.h +++ b/compiler/dex/verification_results.h @@ -48,6 +48,7 @@ class VerificationResults { const VerifiedMethod* GetVerifiedMethod(MethodReference ref) LOCKS_EXCLUDED(verified_methods_lock_); + void RemoveVerifiedMethod(MethodReference ref) LOCKS_EXCLUDED(verified_methods_lock_); void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_); bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_); diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index ebf7874dcf..3a91b08d5e 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -21,7 +21,6 @@ #include "dex/compiler_ir.h" #include "dex_compilation_unit.h" -#include "field_helper.h" #include "mirror/art_field-inl.h" #include "mirror/art_method-inl.h" #include "mirror/class_loader.h" @@ -134,10 +133,9 @@ inline std::pair<bool, bool> CompilerDriver::IsFastStaticField( } else { // Search dex file for localized ssb index, may fail if field's class is a parent // of the class mentioned in the dex file and there is no dex cache entry. - StackHandleScope<1> hs(Thread::Current()); + std::string temp; const DexFile::StringId* string_id = - dex_file->FindStringId( - FieldHelper(hs.NewHandle(resolved_field)).GetDeclaringClassDescriptor()); + dex_file->FindStringId(resolved_field->GetDeclaringClass()->GetDescriptor(&temp)); if (string_id != nullptr) { const DexFile::TypeId* type_id = dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id)); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 2e9f8355f2..e4274712d4 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -19,9 +19,14 @@ #define ATRACE_TAG ATRACE_TAG_DALVIK #include <utils/Trace.h> +#include <unordered_set> #include <vector> #include <unistd.h> +#ifndef __APPLE__ +#include <malloc.h> // For mallinfo +#endif + #include "base/stl_util.h" #include "base/timing_logger.h" #include "class_linker.h" @@ -53,6 +58,7 @@ #include "ScopedLocalRef.h" #include "handle_scope-inl.h" #include "thread.h" +#include "thread_list.h" #include "thread_pool.h" #include "trampolines/trampoline_compiler.h" #include "transaction.h" @@ -495,6 +501,7 @@ void CompilerDriver::CompileAll(jobject class_loader, TimingLogger* timings) { DCHECK(!Runtime::Current()->IsStarted()); std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1)); + VLOG(compiler) << "Before precompile " << GetMemoryUsageString(); PreCompile(class_loader, dex_files, thread_pool.get(), timings); Compile(class_loader, dex_files, thread_pool.get(), timings); if (dump_stats_) { @@ -591,20 +598,25 @@ void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFi void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { LoadImageClasses(timings); + VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(); Resolve(class_loader, dex_files, thread_pool, timings); + VLOG(compiler) << "Resolve: " << GetMemoryUsageString(); if (!compiler_options_->IsVerificationEnabled()) { - LOG(INFO) << "Verify none mode specified, skipping verification."; + VLOG(compiler) << "Verify none mode specified, skipping verification."; SetVerified(class_loader, dex_files, thread_pool, timings); return; } Verify(class_loader, dex_files, thread_pool, timings); + VLOG(compiler) << "Verify: " << GetMemoryUsageString(); InitializeClasses(class_loader, dex_files, thread_pool, timings); + VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(); UpdateImageClasses(timings); + VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(); } bool CompilerDriver::IsImageClass(const char* descriptor) const { @@ -626,10 +638,10 @@ bool CompilerDriver::IsClassToCompile(const char* descriptor) const { } } -static void ResolveExceptionsForMethod(MutableMethodHelper* mh, +static void ResolveExceptionsForMethod(MutableHandle<mirror::ArtMethod> method_handle, std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - const DexFile::CodeItem* code_item = mh->GetMethod()->GetCodeItem(); + const DexFile::CodeItem* code_item = method_handle->GetCodeItem(); if (code_item == nullptr) { return; // native or abstract method } @@ -649,10 +661,10 @@ static void ResolveExceptionsForMethod(MutableMethodHelper* mh, uint16_t encoded_catch_handler_handlers_type_idx = DecodeUnsignedLeb128(&encoded_catch_handler_list); // Add to set of types to resolve if not already in the dex cache resolved types - if (!mh->GetMethod()->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { + if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { exceptions_to_resolve.insert( std::pair<uint16_t, const DexFile*>(encoded_catch_handler_handlers_type_idx, - mh->GetMethod()->GetDexFile())); + method_handle->GetDexFile())); } // ignore address associated with catch handler DecodeUnsignedLeb128(&encoded_catch_handler_list); @@ -669,14 +681,14 @@ static bool ResolveCatchBlockExceptionsClassVisitor(mirror::Class* c, void* arg) std::set<std::pair<uint16_t, const DexFile*>>* exceptions_to_resolve = reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg); StackHandleScope<1> hs(Thread::Current()); - MutableMethodHelper mh(hs.NewHandle<mirror::ArtMethod>(nullptr)); + MutableHandle<mirror::ArtMethod> method_handle(hs.NewHandle<mirror::ArtMethod>(nullptr)); for (size_t i = 0; i < c->NumVirtualMethods(); ++i) { - mh.ChangeMethod(c->GetVirtualMethod(i)); - ResolveExceptionsForMethod(&mh, *exceptions_to_resolve); + method_handle.Assign(c->GetVirtualMethod(i)); + ResolveExceptionsForMethod(method_handle, *exceptions_to_resolve); } for (size_t i = 0; i < c->NumDirectMethods(); ++i) { - mh.ChangeMethod(c->GetDirectMethod(i)); - ResolveExceptionsForMethod(&mh, *exceptions_to_resolve); + method_handle.Assign(c->GetDirectMethod(i)); + ResolveExceptionsForMethod(method_handle, *exceptions_to_resolve); } return true; } @@ -782,23 +794,143 @@ static void MaybeAddToImageClasses(Handle<mirror::Class> c, std::set<std::string } } -void CompilerDriver::FindClinitImageClassesCallback(mirror::Object* object, void* arg) { - DCHECK(object != nullptr); - DCHECK(arg != nullptr); - CompilerDriver* compiler_driver = reinterpret_cast<CompilerDriver*>(arg); - StackHandleScope<1> hs(Thread::Current()); - MaybeAddToImageClasses(hs.NewHandle(object->GetClass()), compiler_driver->image_classes_.get()); -} +// Keeps all the data for the update together. Also doubles as the reference visitor. +// Note: we can use object pointers because we suspend all threads. +class ClinitImageUpdate { + public: + static ClinitImageUpdate* Create(std::set<std::string>* image_class_descriptors, Thread* self, + ClassLinker* linker, std::string* error_msg) { + std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(image_class_descriptors, self, + linker)); + if (res->art_method_class_ == nullptr) { + *error_msg = "Could not find ArtMethod class."; + return nullptr; + } else if (res->dex_cache_class_ == nullptr) { + *error_msg = "Could not find DexCache class."; + return nullptr; + } + + return res.release(); + } + + ~ClinitImageUpdate() { + // Allow others to suspend again. + self_->EndAssertNoThreadSuspension(old_cause_); + } + + // Visitor for VisitReferences. + void operator()(mirror::Object* object, MemberOffset field_offset, bool /* is_static */) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + mirror::Object* ref = object->GetFieldObject<mirror::Object>(field_offset); + if (ref != nullptr) { + VisitClinitClassesObject(ref); + } + } + + // java.lang.Reference visitor for VisitReferences. + void operator()(mirror::Class* /* klass */, mirror::Reference* /* ref */) const { + } + + void Walk() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + // Use the initial classes as roots for a search. + for (mirror::Class* klass_root : image_classes_) { + VisitClinitClassesObject(klass_root); + } + } + + private: + ClinitImageUpdate(std::set<std::string>* image_class_descriptors, Thread* self, + ClassLinker* linker) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : + image_class_descriptors_(image_class_descriptors), self_(self) { + CHECK(linker != nullptr); + CHECK(image_class_descriptors != nullptr); + + // Make sure nobody interferes with us. + old_cause_ = self->StartAssertNoThreadSuspension("Boot image closure"); + + // Find the interesting classes. + art_method_class_ = linker->LookupClass(self, "Ljava/lang/reflect/ArtMethod;", + ComputeModifiedUtf8Hash("Ljava/lang/reflect/ArtMethod;"), nullptr); + dex_cache_class_ = linker->LookupClass(self, "Ljava/lang/DexCache;", + ComputeModifiedUtf8Hash("Ljava/lang/DexCache;"), nullptr); + + // Find all the already-marked classes. + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + linker->VisitClasses(FindImageClasses, this); + } + + static bool FindImageClasses(mirror::Class* klass, void* arg) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + ClinitImageUpdate* data = reinterpret_cast<ClinitImageUpdate*>(arg); + std::string temp; + const char* name = klass->GetDescriptor(&temp); + if (data->image_class_descriptors_->find(name) != data->image_class_descriptors_->end()) { + data->image_classes_.push_back(klass); + } + + return true; + } + + void VisitClinitClassesObject(mirror::Object* object) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + DCHECK(object != nullptr); + if (marked_objects_.find(object) != marked_objects_.end()) { + // Already processed. + return; + } + + // Mark it. + marked_objects_.insert(object); + + if (object->IsClass()) { + // If it is a class, add it. + StackHandleScope<1> hs(self_); + MaybeAddToImageClasses(hs.NewHandle(object->AsClass()), image_class_descriptors_); + } else { + // Else visit the object's class. + VisitClinitClassesObject(object->GetClass()); + } + + // If it is not a dex cache or an ArtMethod, visit all references. + mirror::Class* klass = object->GetClass(); + if (klass != art_method_class_ && klass != dex_cache_class_) { + object->VisitReferences<false /* visit class */>(*this, *this); + } + } + + mutable std::unordered_set<mirror::Object*> marked_objects_; + std::set<std::string>* const image_class_descriptors_; + std::vector<mirror::Class*> image_classes_; + const mirror::Class* art_method_class_; + const mirror::Class* dex_cache_class_; + Thread* const self_; + const char* old_cause_; + + DISALLOW_COPY_AND_ASSIGN(ClinitImageUpdate); +}; void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { if (IsImage()) { TimingLogger::ScopedTiming t("UpdateImageClasses", timings); - // Update image_classes_ with classes for objects created by <clinit> methods. - gc::Heap* heap = Runtime::Current()->GetHeap(); - // TODO: Image spaces only? - ScopedObjectAccess soa(Thread::Current()); - WriterMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_); - heap->VisitObjects(FindClinitImageClassesCallback, this); + + Runtime* current = Runtime::Current(); + + // Suspend all threads. + current->GetThreadList()->SuspendAll(); + + std::string error_msg; + std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(image_classes_.get(), + Thread::Current(), + current->GetClassLinker(), + &error_msg)); + CHECK(update.get() != nullptr) << error_msg; // TODO: Soft failure? + + // Do the marking. + update->Walk(); + + // Resume threads. + current->GetThreadList()->ResumeAll(); } } @@ -1194,11 +1326,10 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType target_method->dex_method_index = method->GetDexMethodIndex(); } else { if (no_guarantee_of_dex_cache_entry) { - StackHandleScope<1> hs(Thread::Current()); - MethodHelper mh(hs.NewHandle(method)); // See if the method is also declared in this dex cache. - uint32_t dex_method_idx = mh.FindDexMethodIndexInOtherDexFile( - *target_method->dex_file, target_method->dex_method_index); + uint32_t dex_method_idx = + method->FindDexMethodIndexInOtherDexFile(*target_method->dex_file, + target_method->dex_method_index); if (dex_method_idx != DexFile::kDexNoIndex) { target_method->dex_method_index = dex_method_idx; } else { @@ -1820,6 +1951,12 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl mirror::Throwable* exception = soa.Self()->GetException(&throw_location); VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " << exception->Dump(); + std::ostream* file_log = manager->GetCompiler()-> + GetCompilerOptions().GetInitFailureOutput(); + if (file_log != nullptr) { + *file_log << descriptor << "\n"; + *file_log << exception->Dump() << "\n"; + } soa.Self()->ClearException(); transaction.Abort(); CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; @@ -1875,6 +2012,7 @@ void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFi CHECK(dex_file != nullptr); CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); } + VLOG(compiler) << "Compile: " << GetMemoryUsageString(); } void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, size_t class_def_index) { @@ -2001,6 +2139,7 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t bool compilation_enabled) { CompiledMethod* compiled_method = nullptr; uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; + MethodReference method_ref(&dex_file, method_idx); if ((access_flags & kAccNative) != 0) { // Are we interpreting only and have support for generic JNI down calls? @@ -2014,7 +2153,6 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t } else if ((access_flags & kAccAbstract) != 0) { // Abstract methods don't have code. } else { - MethodReference method_ref(&dex_file, method_idx); bool compile = compilation_enabled && verification_results_->IsCandidateForCompilation(method_ref, access_flags); if (compile) { @@ -2051,16 +2189,18 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t // When compiling with PIC, there should be zero non-relative linker patches CHECK(!compile_pic || non_relative_linker_patch_count == 0u); - MethodReference ref(&dex_file, method_idx); - DCHECK(GetCompiledMethod(ref) == nullptr) << PrettyMethod(method_idx, dex_file); + DCHECK(GetCompiledMethod(method_ref) == nullptr) << PrettyMethod(method_idx, dex_file); { MutexLock mu(self, compiled_methods_lock_); - compiled_methods_.Put(ref, compiled_method); + compiled_methods_.Put(method_ref, compiled_method); non_relative_linker_patch_count_ += non_relative_linker_patch_count; } - DCHECK(GetCompiledMethod(ref) != nullptr) << PrettyMethod(method_idx, dex_file); + DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file); } + // Done compiling, delete the verified method to reduce native memory usage. + verification_results_->RemoveVerifiedMethod(method_ref); + if (self->IsExceptionPending()) { ScopedObjectAccess soa(self); LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n" @@ -2210,4 +2350,21 @@ bool CompilerDriver::SkipCompilation(const std::string& method_name) { } return !compile; } + +std::string CompilerDriver::GetMemoryUsageString() const { + std::ostringstream oss; + const ArenaPool* arena_pool = GetArenaPool(); + gc::Heap* heap = Runtime::Current()->GetHeap(); + oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated()); + oss << " java alloc=" << PrettySize(heap->GetBytesAllocated()); +#ifdef HAVE_MALLOC_H + struct mallinfo info = mallinfo(); + const size_t allocated_space = static_cast<size_t>(info.uordblks); + const size_t free_space = static_cast<size_t>(info.fordblks); + oss << " native alloc=" << PrettySize(allocated_space) << " free=" + << PrettySize(free_space); +#endif + return oss.str(); +} + } // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 437a1a9c0e..615e0d0db4 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -39,6 +39,7 @@ #include "thread_pool.h" #include "utils/arena_allocator.h" #include "utils/dedupe_set.h" +#include "dex/verified_method.h" namespace art { @@ -340,6 +341,9 @@ class CompilerDriver { ArenaPool* GetArenaPool() { return &arena_pool_; } + const ArenaPool* GetArenaPool() const { + return &arena_pool_; + } bool WriteElf(const std::string& android_root, bool is_host, @@ -395,6 +399,9 @@ class CompilerDriver { // Should the compiler run on this method given profile information? bool SkipCompilation(const std::string& method_name); + // Get memory usage during compilation. + std::string GetMemoryUsageString() const; + private: // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 0592f0cf1e..aec7d241f5 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_ #define ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_ +#include <ostream> #include <string> #include <vector> @@ -70,7 +71,8 @@ class CompilerOptions FINAL { #ifdef ART_SEA_IR_MODE sea_ir_mode_(false), #endif - verbose_methods_(nullptr) { + verbose_methods_(nullptr), + init_failure_output_(nullptr) { } CompilerOptions(CompilerFilter compiler_filter, @@ -90,7 +92,8 @@ class CompilerOptions FINAL { #ifdef ART_SEA_IR_MODE bool sea_ir_mode, #endif - const std::vector<std::string>* verbose_methods + const std::vector<std::string>* verbose_methods, + std::ostream* init_failure_output ) : // NOLINT(whitespace/parens) compiler_filter_(compiler_filter), huge_method_threshold_(huge_method_threshold), @@ -109,7 +112,8 @@ class CompilerOptions FINAL { #ifdef ART_SEA_IR_MODE sea_ir_mode_(sea_ir_mode), #endif - verbose_methods_(verbose_methods) { + verbose_methods_(verbose_methods), + init_failure_output_(init_failure_output) { } CompilerFilter GetCompilerFilter() const { @@ -217,6 +221,10 @@ class CompilerOptions FINAL { return false; } + std::ostream* GetInitFailureOutput() const { + return init_failure_output_; + } + private: CompilerFilter compiler_filter_; const size_t huge_method_threshold_; @@ -241,6 +249,9 @@ class CompilerOptions FINAL { // Vector of methods to have verbose output enabled for. const std::vector<std::string>* const verbose_methods_; + // Log initialization of initialization failures to this stream if not null. + std::ostream* const init_failure_output_; + DISALLOW_COPY_AND_ASSIGN(CompilerOptions); }; std::ostream& operator<<(std::ostream& os, const CompilerOptions::CompilerFilter& rhs); diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h index 84f57991c3..03ae489da1 100644 --- a/compiler/driver/dex_compilation_unit.h +++ b/compiler/driver/dex_compilation_unit.h @@ -102,6 +102,10 @@ class DexCompilationUnit { return verified_method_; } + void ClearVerifiedMethod() { + verified_method_ = nullptr; + } + const std::string& GetSymbol(); private: @@ -117,7 +121,7 @@ class DexCompilationUnit { const uint16_t class_def_idx_; const uint32_t dex_method_idx_; const uint32_t access_flags_; - const VerifiedMethod* const verified_method_; + const VerifiedMethod* verified_method_; std::string symbol_; }; diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc index 2ffbd10eab..5488e2f6d0 100644 --- a/compiler/elf_writer_test.cc +++ b/compiler/elf_writer_test.cc @@ -46,7 +46,11 @@ class ElfWriterTest : public CommonCompilerTest { EXPECT_EQ(expected_value, ef->FindDynamicSymbolAddress(symbol_name)); \ } while (false) +#if defined(ART_USE_OPTIMIZING_COMPILER) +TEST_F(ElfWriterTest, DISABLED_dlsym) { +#else TEST_F(ElfWriterTest, dlsym) { +#endif std::string elf_location; if (IsHost()) { const char* host_dir = getenv("ANDROID_HOST_OUT"); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 3b1d914f6e..eb1b5db958 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -77,6 +77,7 @@ bool ImageWriter::PrepareImageAddressSpace() { Thread::Current()->TransitionFromSuspendedToRunnable(); PruneNonImageClasses(); // Remove junk ComputeLazyFieldsForImageClasses(); // Add useful information + ProcessStrings(); Thread::Current()->TransitionFromRunnableToSuspended(kNative); } gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -479,34 +480,29 @@ class StringCollector { }; // Compare strings based on length, used for sorting strings by length / reverse length. -class StringLengthComparator { +class LexicographicalStringComparator { public: - explicit StringLengthComparator(Handle<mirror::ObjectArray<mirror::String>> strings) - : strings_(strings) { + bool operator()(const mirror::HeapReference<mirror::String>& lhs, + const mirror::HeapReference<mirror::String>& rhs) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + mirror::String* lhs_s = lhs.AsMirrorPtr(); + mirror::String* rhs_s = rhs.AsMirrorPtr(); + uint16_t* lhs_begin = lhs_s->GetCharArray()->GetData() + lhs_s->GetOffset(); + uint16_t* rhs_begin = rhs_s->GetCharArray()->GetData() + rhs_s->GetOffset(); + return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(), + rhs_begin, rhs_begin + rhs_s->GetLength()); } - bool operator()(size_t a, size_t b) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return strings_->GetWithoutChecks(a)->GetLength() < strings_->GetWithoutChecks(b)->GetLength(); - } - - private: - Handle<mirror::ObjectArray<mirror::String>> strings_; }; -// Normal string < comparison through the chars_ array. -class SubstringComparator { - public: - explicit SubstringComparator(const std::vector<uint16_t>* const chars) : chars_(chars) { - } - bool operator()(const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b) { - return std::lexicographical_compare(chars_->begin() + a.first, - chars_->begin() + a.first + a.second, - chars_->begin() + b.first, - chars_->begin() + b.first + b.second); +static bool IsPrefix(mirror::String* pref, mirror::String* full) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (pref->GetLength() > full->GetLength()) { + return false; } - - private: - const std::vector<uint16_t>* const chars_; -}; + uint16_t* pref_begin = pref->GetCharArray()->GetData() + pref->GetOffset(); + uint16_t* full_begin = full->GetCharArray()->GetData() + full->GetOffset(); + return std::equal(pref_begin, pref_begin + pref->GetLength(), full_begin); +} void ImageWriter::ProcessStrings() { size_t total_strings = 0; @@ -528,67 +524,52 @@ void ImageWriter::ProcessStrings() { // Some strings could have gotten freed if AllocStringArray caused a GC. CHECK_LE(string_collector.GetIndex(), total_strings); total_strings = string_collector.GetIndex(); - size_t total_length = 0; - std::vector<size_t> reverse_sorted_strings; - for (size_t i = 0; i < total_strings; ++i) { - mirror::String* s = strings->GetWithoutChecks(i); - // Look up the string in the array. - total_length += s->GetLength(); - reverse_sorted_strings.push_back(i); - } - // Sort by reverse length. - StringLengthComparator comparator(strings); - std::sort(reverse_sorted_strings.rbegin(), reverse_sorted_strings.rend(), comparator); - // Deduplicate prefixes and add strings to the char array. - std::vector<uint16_t> combined_chars(total_length, 0U); - size_t num_chars = 0; + auto* strings_begin = reinterpret_cast<mirror::HeapReference<mirror::String>*>( + strings->GetRawData(sizeof(mirror::HeapReference<mirror::String>), 0)); + std::sort(strings_begin, strings_begin + total_strings, LexicographicalStringComparator()); // Characters of strings which are non equal prefix of another string (not the same string). // We don't count the savings from equal strings since these would get interned later anyways. size_t prefix_saved_chars = 0; - std::set<std::pair<size_t, size_t>, SubstringComparator> existing_strings(( - SubstringComparator(&combined_chars))); - for (size_t i = 0; i < total_strings; ++i) { - mirror::String* s = strings->GetWithoutChecks(reverse_sorted_strings[i]); - // Add the string to the end of the char array. + // Count characters needed for the strings. + size_t num_chars = 0u; + mirror::String* prev_s = nullptr; + for (size_t idx = 0; idx != total_strings; ++idx) { + mirror::String* s = strings->GetWithoutChecks(idx); size_t length = s->GetLength(); - for (size_t j = 0; j < length; ++j) { - combined_chars[num_chars++] = s->CharAt(j); - } - // Try to see if the string exists as a prefix of an existing string. - size_t new_offset = 0; - std::pair<size_t, size_t> new_string(num_chars - length, length); - auto it = existing_strings.lower_bound(new_string); - bool is_prefix = false; - if (it != existing_strings.end()) { - CHECK_LE(length, it->second); - is_prefix = std::equal(combined_chars.begin() + it->first, - combined_chars.begin() + it->first + it->second, - combined_chars.begin() + new_string.first); - } - if (is_prefix) { - // Shares a prefix, set the offset to where the new offset will be. - new_offset = it->first; - // Remove the added chars. - num_chars -= length; - if (it->second != length) { - prefix_saved_chars += length; + num_chars += length; + if (prev_s != nullptr && IsPrefix(prev_s, s)) { + size_t prev_length = prev_s->GetLength(); + num_chars -= prev_length; + if (prev_length != length) { + prefix_saved_chars += prev_length; } - } else { - new_offset = new_string.first; - existing_strings.insert(new_string); } - s->SetOffset(new_offset); - } - // Allocate and update the char arrays. - auto* array = mirror::CharArray::Alloc(self, num_chars); - for (size_t i = 0; i < num_chars; ++i) { - array->SetWithoutChecks<false>(i, combined_chars[i]); - } - for (size_t i = 0; i < total_strings; ++i) { - strings->GetWithoutChecks(i)->SetArray(array); + prev_s = s; + } + // Create character array, copy characters and point the strings there. + mirror::CharArray* array = mirror::CharArray::Alloc(self, num_chars); + string_data_array_ = array; + uint16_t* array_data = array->GetData(); + size_t pos = 0u; + prev_s = nullptr; + for (size_t idx = 0; idx != total_strings; ++idx) { + mirror::String* s = strings->GetWithoutChecks(idx); + uint16_t* s_data = s->GetCharArray()->GetData() + s->GetOffset(); + int32_t s_length = s->GetLength(); + int32_t prefix_length = 0u; + if (idx != 0u && IsPrefix(prev_s, s)) { + prefix_length = prev_s->GetLength(); + } + memcpy(array_data + pos, s_data + prefix_length, (s_length - prefix_length) * sizeof(*s_data)); + s->SetOffset(pos - prefix_length); + s->SetArray(array); + pos += s_length - prefix_length; + prev_s = s; } + CHECK_EQ(pos, num_chars); + LOG(INFO) << "Total # image strings=" << total_strings << " combined length=" - << total_length << " prefix saved chars=" << prefix_saved_chars; + << num_chars << " prefix saved chars=" << prefix_saved_chars; ComputeEagerResolvedStrings(); } @@ -1254,4 +1235,13 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } +void ImageWriter::FreeStringDataArray() { + if (string_data_array_ != nullptr) { + gc::space::LargeObjectSpace* los = Runtime::Current()->GetHeap()->GetLargeObjectsSpace(); + if (los != nullptr) { + los->Free(Thread::Current(), reinterpret_cast<mirror::Object*>(string_data_array_)); + } + } +} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 8c84b686fa..4418879e73 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -18,6 +18,7 @@ #define ART_COMPILER_IMAGE_WRITER_H_ #include <stdint.h> +#include <valgrind.h> #include <cstddef> #include <memory> @@ -56,7 +57,15 @@ class ImageWriter FINAL { CHECK_NE(image_begin, 0U); } - ~ImageWriter() {} + ~ImageWriter() { + // For interned strings a large array is allocated to hold all the character data and avoid + // overhead. However, no GC is run anymore at this point. As the array is likely large, it + // will be allocated in the large object space, where valgrind can track every single + // allocation. Not explicitly freeing that array will be recognized as a leak. + if (RUNNING_ON_VALGRIND != 0) { + FreeStringDataArray(); + } + } bool PrepareImageAddressSpace(); @@ -254,6 +263,9 @@ class ImageWriter FINAL { // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins. size_t GetBinSizeSum(Bin up_to = kBinSize) const; + // Release the string_data_array_. + void FreeStringDataArray(); + const CompilerDriver& compiler_driver_; // Beginning target image address for the output image. @@ -306,6 +318,8 @@ class ImageWriter FINAL { size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin size_t bin_slot_count_[kBinSize]; // Number of objects in a bin + void* string_data_array_; // The backing for the interned strings. + friend class FixupVisitor; friend class FixupClassVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc new file mode 100644 index 0000000000..c353d66f1e --- /dev/null +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -0,0 +1,691 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bounds_check_elimination.h" +#include "nodes.h" +#include "utils/arena_containers.h" + +namespace art { + +class MonotonicValueRange; + +/** + * A value bound is represented as a pair of value and constant, + * e.g. array.length - 1. + */ +class ValueBound : public ValueObject { + public: + static ValueBound Create(HInstruction* instruction, int constant) { + if (instruction == nullptr) { + return ValueBound(nullptr, constant); + } + if (instruction->IsIntConstant()) { + return ValueBound(nullptr, instruction->AsIntConstant()->GetValue() + constant); + } + return ValueBound(instruction, constant); + } + + HInstruction* GetInstruction() const { return instruction_; } + int GetConstant() const { return constant_; } + + bool IsRelativeToArrayLength() const { + return instruction_ != nullptr && instruction_->IsArrayLength(); + } + + bool IsConstant() const { + return instruction_ == nullptr; + } + + static ValueBound Min() { return ValueBound(nullptr, INT_MIN); } + static ValueBound Max() { return ValueBound(nullptr, INT_MAX); } + + bool Equals(ValueBound bound) const { + return instruction_ == bound.instruction_ && constant_ == bound.constant_; + } + + // Returns if it's certain bound1 >= bound2. + bool GreaterThanOrEqual(ValueBound bound) const { + if (instruction_ == bound.instruction_) { + if (instruction_ == nullptr) { + // Pure constant. + return constant_ >= bound.constant_; + } + // There might be overflow/underflow. Be conservative for now. + return false; + } + // Not comparable. Just return false. + return false; + } + + // Returns if it's certain bound1 <= bound2. + bool LessThanOrEqual(ValueBound bound) const { + if (instruction_ == bound.instruction_) { + if (instruction_ == nullptr) { + // Pure constant. + return constant_ <= bound.constant_; + } + if (IsRelativeToArrayLength()) { + // Array length is guaranteed to be no less than 0. + // No overflow/underflow can happen if both constants are negative. + if (constant_ <= 0 && bound.constant_ <= 0) { + return constant_ <= bound.constant_; + } + // There might be overflow/underflow. Be conservative for now. + return false; + } + } + + // In case the array length is some constant, we can + // still compare. + if (IsConstant() && bound.IsRelativeToArrayLength()) { + HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0); + if (array->IsNullCheck()) { + array = array->AsNullCheck()->InputAt(0); + } + if (array->IsNewArray()) { + HInstruction* len = array->InputAt(0); + if (len->IsIntConstant()) { + int len_const = len->AsIntConstant()->GetValue(); + return constant_ <= len_const + bound.GetConstant(); + } + } + } + + // Not comparable. Just return false. + return false; + } + + // Try to narrow lower bound. Returns the greatest of the two if possible. + // Pick one if they are not comparable. + static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) { + if (bound1.instruction_ == bound2.instruction_) { + // Same instruction, compare the constant part. + return ValueBound(bound1.instruction_, + std::max(bound1.constant_, bound2.constant_)); + } + + // Not comparable. Just pick one. We may lose some info, but that's ok. + // Favor constant as lower bound. + return bound1.IsConstant() ? bound1 : bound2; + } + + // Try to narrow upper bound. Returns the lowest of the two if possible. + // Pick one if they are not comparable. + static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) { + if (bound1.instruction_ == bound2.instruction_) { + // Same instruction, compare the constant part. + return ValueBound(bound1.instruction_, + std::min(bound1.constant_, bound2.constant_)); + } + + // Not comparable. Just pick one. We may lose some info, but that's ok. + // Favor array length as upper bound. + return bound1.IsRelativeToArrayLength() ? bound1 : bound2; + } + + // Add a constant to a ValueBound. If the constant part of the ValueBound + // overflows/underflows, then we can't accurately represent it. For correctness, + // just return Max/Min() depending on whether the returned ValueBound is used for + // lower/upper bound. + ValueBound Add(int c, bool for_lower_bound, bool* overflow_or_underflow) const { + *overflow_or_underflow = false; + if (c == 0) { + return *this; + } + + int new_constant; + if (c > 0) { + if (constant_ > INT_MAX - c) { + // Constant part overflows. + *overflow_or_underflow = true; + return for_lower_bound ? Min() : Max(); + } else { + new_constant = constant_ + c; + } + } else { + if (constant_ < INT_MIN - c) { + // Constant part underflows. + *overflow_or_underflow = true; + return for_lower_bound ? Min() : Max(); + } else { + new_constant = constant_ + c; + } + } + return ValueBound(instruction_, new_constant); + } + + private: + ValueBound(HInstruction* instruction, int constant) + : instruction_(instruction), constant_(constant) {} + + HInstruction* instruction_; + int constant_; +}; + +/** + * Represent a range of lower bound and upper bound, both being inclusive. + * Currently a ValueRange may be generated as a result of the following: + * comparisons related to array bounds, array bounds check, add/sub on top + * of an existing value range, or a loop phi corresponding to an + * incrementing/decrementing array index (MonotonicValueRange). + */ +class ValueRange : public ArenaObject<kArenaAllocMisc> { + public: + ValueRange(ArenaAllocator* allocator, ValueBound lower, ValueBound upper) + : allocator_(allocator), lower_(lower), upper_(upper) {} + + virtual ~ValueRange() {} + + virtual const MonotonicValueRange* AsMonotonicValueRange() const { return nullptr; } + bool IsMonotonicValueRange() const { + return AsMonotonicValueRange() != nullptr; + } + + ArenaAllocator* GetAllocator() const { return allocator_; } + ValueBound GetLower() const { return lower_; } + ValueBound GetUpper() const { return upper_; } + + // If it's certain that this value range fits in other_range. + virtual bool FitsIn(ValueRange* other_range) const { + if (other_range == nullptr) { + return true; + } + DCHECK(!other_range->IsMonotonicValueRange()); + return lower_.GreaterThanOrEqual(other_range->lower_) && + upper_.LessThanOrEqual(other_range->upper_); + } + + // Returns the intersection of this and range. + // If it's not possible to do intersection because some + // bounds are not comparable, it's ok to pick either bound. + virtual ValueRange* Narrow(ValueRange* range) { + if (range == nullptr) { + return this; + } + + if (range->IsMonotonicValueRange()) { + return this; + } + + return new (allocator_) ValueRange( + allocator_, + ValueBound::NarrowLowerBound(lower_, range->lower_), + ValueBound::NarrowUpperBound(upper_, range->upper_)); + } + + // Shift a range by a constant. If either bound can't be represented + // as (instruction+c) format due to possible overflow/underflow, + // return the full integer range. + ValueRange* Add(int constant) const { + bool overflow_or_underflow; + ValueBound lower = lower_.Add(constant, true, &overflow_or_underflow); + if (overflow_or_underflow) { + // We can't accurately represent the bounds anymore. + return FullIntRange(); + } + ValueBound upper = upper_.Add(constant, false, &overflow_or_underflow); + if (overflow_or_underflow) { + // We can't accurately represent the bounds anymore. + return FullIntRange(); + } + return new (allocator_) ValueRange(allocator_, lower, upper); + } + + // Return [INT_MIN, INT_MAX]. + ValueRange* FullIntRange() const { + return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max()); + } + + private: + ArenaAllocator* const allocator_; + const ValueBound lower_; // inclusive + const ValueBound upper_; // inclusive + + DISALLOW_COPY_AND_ASSIGN(ValueRange); +}; + +/** + * A monotonically incrementing/decrementing value range, e.g. + * the variable i in "for (int i=0; i<array.length; i++)". + * Special care needs to be taken to account for overflow/underflow + * of such value ranges. + */ +class MonotonicValueRange : public ValueRange { + public: + static MonotonicValueRange* Create(ArenaAllocator* allocator, + HInstruction* initial, int increment) { + DCHECK_NE(increment, 0); + // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's + // used as a regular value range, due to possible overflow/underflow. + return new (allocator) MonotonicValueRange( + allocator, ValueBound::Min(), ValueBound::Max(), initial, increment); + } + + virtual ~MonotonicValueRange() {} + + const MonotonicValueRange* AsMonotonicValueRange() const OVERRIDE { return this; } + + // If it's certain that this value range fits in other_range. + bool FitsIn(ValueRange* other_range) const OVERRIDE { + if (other_range == nullptr) { + return true; + } + DCHECK(!other_range->IsMonotonicValueRange()); + return false; + } + + // Try to narrow this MonotonicValueRange given another range. + // Ideally it will return a normal ValueRange. But due to + // possible overflow/underflow, that may not be possible. + ValueRange* Narrow(ValueRange* range) OVERRIDE { + if (range == nullptr) { + return this; + } + DCHECK(!range->IsMonotonicValueRange()); + + if (increment_ > 0) { + // Monotonically increasing. + ValueBound lower = ValueBound::NarrowLowerBound( + ValueBound::Create(initial_, 0), range->GetLower()); + + // We currently conservatively assume max array length is INT_MAX. If we can + // make assumptions about the max array length, e.g. due to the max heap size, + // divided by the element size (such as 4 bytes for each integer array), we can + // lower this number and rule out some possible overflows. + int max_array_len = INT_MAX; + + int upper = INT_MAX; + if (range->GetUpper().IsConstant()) { + upper = range->GetUpper().GetConstant(); + } else if (range->GetUpper().IsRelativeToArrayLength()) { + int constant = range->GetUpper().GetConstant(); + if (constant <= 0) { + // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc. + upper = max_array_len + constant; + } else { + // There might be overflow. Give up narrowing. + return this; + } + } else { + // There might be overflow. Give up narrowing. + return this; + } + + // If we can prove for the last number in sequence of initial_, + // initial_ + increment_, initial_ + 2 x increment_, ... + // that's <= upper, (last_num_in_sequence + increment_) doesn't trigger overflow, + // then this MonoticValueRange is narrowed to a normal value range. + + // Be conservative first, assume last number in the sequence hits upper. + int last_num_in_sequence = upper; + if (initial_->IsIntConstant()) { + int initial_constant = initial_->AsIntConstant()->GetValue(); + if (upper <= initial_constant) { + last_num_in_sequence = upper; + } else { + // Cast to int64_t for the substraction part to avoid int overflow. + last_num_in_sequence = initial_constant + + ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_; + } + } + if (last_num_in_sequence <= INT_MAX - increment_) { + // No overflow. The sequence will be stopped by the upper bound test as expected. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, range->GetUpper()); + } + + // There might be overflow. Give up narrowing. + return this; + } else { + DCHECK_NE(increment_, 0); + // Monotonically decreasing. + ValueBound upper = ValueBound::NarrowUpperBound( + ValueBound::Create(initial_, 0), range->GetUpper()); + + // Need to take care of underflow. Try to prove underflow won't happen + // for common cases. Basically need to be able to prove for any value + // that's >= range->GetLower(), it won't be positive with value+increment. + if (range->GetLower().IsConstant()) { + int constant = range->GetLower().GetConstant(); + if (constant >= INT_MIN - increment_) { + return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper); + } + } + + // There might be underflow. Give up narrowing. + return this; + } + } + + private: + MonotonicValueRange(ArenaAllocator* allocator, ValueBound lower, + ValueBound upper, HInstruction* initial, int increment) + : ValueRange(allocator, lower, upper), + initial_(initial), + increment_(increment) {} + + HInstruction* const initial_; + const int increment_; + + DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); +}; + +class BCEVisitor : public HGraphVisitor { + public: + explicit BCEVisitor(HGraph* graph) + : HGraphVisitor(graph), + maps_(graph->GetBlocks().Size()) {} + + private: + // Return the map of proven value ranges at the beginning of a basic block. + ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + int block_id = basic_block->GetBlockId(); + if (maps_.at(block_id) == nullptr) { + std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( + new ArenaSafeMap<int, ValueRange*>( + std::less<int>(), GetGraph()->GetArena()->Adapter())); + maps_.at(block_id) = std::move(map); + } + return maps_.at(block_id).get(); + } + + // Traverse up the dominator tree to look for value range info. + ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { + while (basic_block != nullptr) { + ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); + if (map->find(instruction->GetId()) != map->end()) { + return map->Get(instruction->GetId()); + } + basic_block = basic_block->GetDominator(); + } + // Didn't find any. + return nullptr; + } + + // Try to detect useful value bound format from an instruction, e.g. + // a constant or array length related value. + ValueBound DetectValueBoundFromValue(HInstruction* instruction) { + if (instruction->IsIntConstant()) { + return ValueBound::Create(nullptr, instruction->AsIntConstant()->GetValue()); + } + + if (instruction->IsArrayLength()) { + return ValueBound::Create(instruction, 0); + } + // Try to detect (array.length + c) format. + if (instruction->IsAdd()) { + HAdd* add = instruction->AsAdd(); + HInstruction* left = add->GetLeft(); + HInstruction* right = add->GetRight(); + if (left->IsArrayLength() && right->IsIntConstant()) { + return ValueBound::Create(left, right->AsIntConstant()->GetValue()); + } + } + + // No useful bound detected. + return ValueBound::Max(); + } + + // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range', + // and push the narrowed value range to 'successor'. + void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block, + HBasicBlock* successor, ValueRange* range) { + ValueRange* existing_range = LookupValueRange(instruction, basic_block); + ValueRange* narrowed_range = (existing_range == nullptr) ? + range : existing_range->Narrow(range); + if (narrowed_range != nullptr) { + GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range); + } + } + + // Handle "if (left cmp_cond right)". + void HandleIf(HIf* instruction, HInstruction* left, HInstruction* right, IfCondition cond) { + HBasicBlock* block = instruction->GetBlock(); + + HBasicBlock* true_successor = instruction->IfTrueSuccessor(); + // There should be no critical edge at this point. + DCHECK_EQ(true_successor->GetPredecessors().Size(), 1u); + + HBasicBlock* false_successor = instruction->IfFalseSuccessor(); + // There should be no critical edge at this point. + DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u); + + ValueBound bound = DetectValueBoundFromValue(right); + bool found = !bound.Equals(ValueBound::Max()); + + ValueBound lower = bound; + ValueBound upper = bound; + if (!found) { + // No constant or array.length+c bound found. + // For i<j, we can still use j's upper bound as i's upper bound. Same for lower. + ValueRange* range = LookupValueRange(right, block); + if (range != nullptr) { + lower = range->GetLower(); + upper = range->GetUpper(); + } else { + lower = ValueBound::Min(); + upper = ValueBound::Max(); + } + } + + bool overflow_or_underflow; + if (cond == kCondLT || cond == kCondLE) { + if (!upper.Equals(ValueBound::Max())) { + int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, false, &overflow_or_underflow); + // overflow_or_underflow is ignored here since we already use ValueBound::Min() + // for lower bound. + ValueRange* new_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); + ApplyRangeFromComparison(left, block, true_successor, new_range); + } + + // array.length as a lower bound isn't considered useful. + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { + int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, true, &overflow_or_underflow); + // overflow_or_underflow is ignored here since we already use ValueBound::Max() + // for upper bound. + ValueRange* new_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); + ApplyRangeFromComparison(left, block, false_successor, new_range); + } + } else if (cond == kCondGT || cond == kCondGE) { + // array.length as a lower bound isn't considered useful. + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { + int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, true, &overflow_or_underflow); + // overflow_or_underflow is ignored here since we already use ValueBound::Max() + // for upper bound. + ValueRange* new_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); + ApplyRangeFromComparison(left, block, true_successor, new_range); + } + + if (!upper.Equals(ValueBound::Max())) { + int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, false, &overflow_or_underflow); + // overflow_or_underflow is ignored here since we already use ValueBound::Min() + // for lower bound. + ValueRange* new_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); + ApplyRangeFromComparison(left, block, false_successor, new_range); + } + } + } + + void VisitBoundsCheck(HBoundsCheck* bounds_check) { + HBasicBlock* block = bounds_check->GetBlock(); + HInstruction* index = bounds_check->InputAt(0); + HInstruction* array_length = bounds_check->InputAt(1); + ValueRange* index_range = LookupValueRange(index, block); + + if (index_range != nullptr) { + ValueBound lower = ValueBound::Create(nullptr, 0); // constant 0 + ValueBound upper = ValueBound::Create(array_length, -1); // array_length - 1 + ValueRange* array_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + if (index_range->FitsIn(array_range)) { + ReplaceBoundsCheck(bounds_check, index); + return; + } + } + + if (index->IsIntConstant()) { + ValueRange* array_length_range = LookupValueRange(array_length, block); + int constant = index->AsIntConstant()->GetValue(); + if (array_length_range != nullptr && + array_length_range->GetLower().IsConstant()) { + if (constant < array_length_range->GetLower().GetConstant()) { + ReplaceBoundsCheck(bounds_check, index); + return; + } + } + + // Once we have an array access like 'array[5] = 1', we record array.length >= 6. + ValueBound lower = ValueBound::Create(nullptr, constant + 1); + ValueBound upper = ValueBound::Max(); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + ValueRange* existing_range = LookupValueRange(array_length, block); + ValueRange* new_range = range; + if (existing_range != nullptr) { + new_range = range->Narrow(existing_range); + } + GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range); + } + } + + void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) { + bounds_check->ReplaceWith(index); + bounds_check->GetBlock()->RemoveInstruction(bounds_check); + } + + void VisitPhi(HPhi* phi) { + if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { + DCHECK_EQ(phi->InputCount(), 2U); + HInstruction* instruction = phi->InputAt(1); + if (instruction->IsAdd()) { + HAdd* add = instruction->AsAdd(); + HInstruction* left = add->GetLeft(); + HInstruction* right = add->GetRight(); + if (left == phi && right->IsIntConstant()) { + HInstruction* initial_value = phi->InputAt(0); + ValueRange* range = nullptr; + if (right->AsIntConstant()->GetValue() == 0) { + // Add constant 0. It's really a fixed value. + range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound::Create(initial_value, 0), + ValueBound::Create(initial_value, 0)); + } else { + // Monotonically increasing/decreasing. + range = MonotonicValueRange::Create( + GetGraph()->GetArena(), + initial_value, + right->AsIntConstant()->GetValue()); + } + GetValueRangeMap(phi->GetBlock())->Overwrite(phi->GetId(), range); + } + } + } + } + + void VisitIf(HIf* instruction) { + if (instruction->InputAt(0)->IsCondition()) { + HCondition* cond = instruction->InputAt(0)->AsCondition(); + IfCondition cmp = cond->GetCondition(); + if (cmp == kCondGT || cmp == kCondGE || + cmp == kCondLT || cmp == kCondLE) { + HInstruction* left = cond->GetLeft(); + HInstruction* right = cond->GetRight(); + HandleIf(instruction, left, right, cmp); + } + } + } + + void VisitAdd(HAdd* add) { + HInstruction* right = add->GetRight(); + if (right->IsIntConstant()) { + ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock()); + if (left_range == nullptr) { + return; + } + ValueRange* range = left_range->Add(right->AsIntConstant()->GetValue()); + if (range != nullptr) { + GetValueRangeMap(add->GetBlock())->Overwrite(add->GetId(), range); + } + } + } + + void VisitSub(HSub* sub) { + HInstruction* left = sub->GetLeft(); + HInstruction* right = sub->GetRight(); + if (right->IsIntConstant()) { + ValueRange* left_range = LookupValueRange(left, sub->GetBlock()); + if (left_range == nullptr) { + return; + } + ValueRange* range = left_range->Add(-right->AsIntConstant()->GetValue()); + if (range != nullptr) { + GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range); + return; + } + } + + // Here we are interested in the typical triangular case of nested loops, + // such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i + // is the index for outer loop. In this case, we know j is bounded by array.length-1. + if (left->IsArrayLength()) { + HInstruction* array_length = left->AsArrayLength(); + ValueRange* right_range = LookupValueRange(right, sub->GetBlock()); + if (right_range != nullptr) { + ValueBound lower = right_range->GetLower(); + ValueBound upper = right_range->GetUpper(); + if (lower.IsConstant() && upper.IsRelativeToArrayLength()) { + HInstruction* upper_inst = upper.GetInstruction(); + if (upper_inst->IsArrayLength() && + upper_inst->AsArrayLength() == array_length) { + // (array.length - v) where v is in [c1, array.length + c2] + // gets [-c2, array.length - c1] as its value range. + ValueRange* range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound::Create(nullptr, - upper.GetConstant()), + ValueBound::Create(array_length, - lower.GetConstant())); + GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range); + } + } + } + } + } + + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; + + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); +}; + +void BoundsCheckElimination::Run() { + BCEVisitor visitor(graph_); + // Reverse post order guarantees a node's dominators are visited first. + // We want to visit in the dominator-based order since if a value is known to + // be bounded by a range at one instruction, it must be true that all uses of + // that value dominated by that instruction fits in that range. Range of that + // value can be narrowed further down in the dominator tree. + // + // TODO: only visit blocks that dominate some array accesses. + visitor.VisitReversePostOrder(); +} + +} // namespace art diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h new file mode 100644 index 0000000000..05cb185ed7 --- /dev/null +++ b/compiler/optimizing/bounds_check_elimination.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ + +#include "optimization.h" + +namespace art { + +class BoundsCheckElimination : public HOptimization { + public: + explicit BoundsCheckElimination(HGraph* graph) : HOptimization(graph, true, "BCE") {} + + void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc new file mode 100644 index 0000000000..f650ff21df --- /dev/null +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -0,0 +1,1045 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bounds_check_elimination.h" +#include "builder.h" +#include "gvn.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "utils/arena_allocator.h" + +#include "gtest/gtest.h" + +namespace art { + +// if (i < 0) { array[i] = 1; // Can't eliminate. } +// else if (i >= array.length) { array[i] = 1; // Can't eliminate. } +// else { array[i] = 1; // Can eliminate. } +TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter1 = new (&allocator) + HParameterValue(0, Primitive::kPrimNot); // array + HInstruction* parameter2 = new (&allocator) + HParameterValue(0, Primitive::kPrimInt); // i + HInstruction* constant_1 = new (&allocator) HIntConstant(1); + HInstruction* constant_0 = new (&allocator) HIntConstant(0); + entry->AddInstruction(parameter1); + entry->AddInstruction(parameter2); + entry->AddInstruction(constant_1); + entry->AddInstruction(constant_0); + + HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block1); + HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(parameter2, constant_0); + HIf* if_inst = new (&allocator) HIf(cmp); + block1->AddInstruction(cmp); + block1->AddInstruction(if_inst); + entry->AddSuccessor(block1); + + HBasicBlock* block2 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block2); + HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0); + HArrayLength* array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check2 = new (&allocator) + HBoundsCheck(parameter2, array_length, 0); + HArraySet* array_set = new (&allocator) HArraySet( + null_check, bounds_check2, constant_1, Primitive::kPrimInt, 0); + block2->AddInstruction(null_check); + block2->AddInstruction(array_length); + block2->AddInstruction(bounds_check2); + block2->AddInstruction(array_set); + + HBasicBlock* block3 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block3); + null_check = new (&allocator) HNullCheck(parameter1, 0); + array_length = new (&allocator) HArrayLength(null_check); + cmp = new (&allocator) HLessThan(parameter2, array_length); + if_inst = new (&allocator) HIf(cmp); + block3->AddInstruction(null_check); + block3->AddInstruction(array_length); + block3->AddInstruction(cmp); + block3->AddInstruction(if_inst); + + HBasicBlock* block4 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block4); + null_check = new (&allocator) HNullCheck(parameter1, 0); + array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check4 = new (&allocator) + HBoundsCheck(parameter2, array_length, 0); + array_set = new (&allocator) HArraySet( + null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + block4->AddInstruction(null_check); + block4->AddInstruction(array_length); + block4->AddInstruction(bounds_check4); + block4->AddInstruction(array_set); + + HBasicBlock* block5 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block5); + null_check = new (&allocator) HNullCheck(parameter1, 0); + array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check5 = new (&allocator) + HBoundsCheck(parameter2, array_length, 0); + array_set = new (&allocator) HArraySet( + null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + block5->AddInstruction(null_check); + block5->AddInstruction(array_length); + block5->AddInstruction(bounds_check5); + block5->AddInstruction(array_set); + + HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit); + block2->AddSuccessor(exit); + block4->AddSuccessor(exit); + block5->AddSuccessor(exit); + exit->AddInstruction(new (&allocator) HExit()); + + block1->AddSuccessor(block3); // True successor + block1->AddSuccessor(block2); // False successor + + block3->AddSuccessor(block5); // True successor + block3->AddSuccessor(block4); // False successor + + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check2)); + ASSERT_FALSE(IsRemoved(bounds_check4)); + ASSERT_TRUE(IsRemoved(bounds_check5)); +} + +// if (i > 0) { +// // Positive number plus MAX_INT will overflow and be negative. +// int j = i + Integer.MAX_VALUE; +// if (j < array.length) array[j] = 1; // Can't eliminate. +// } +TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter1 = new (&allocator) + HParameterValue(0, Primitive::kPrimNot); // array + HInstruction* parameter2 = new (&allocator) + HParameterValue(0, Primitive::kPrimInt); // i + HInstruction* constant_1 = new (&allocator) HIntConstant(1); + HInstruction* constant_0 = new (&allocator) HIntConstant(0); + HInstruction* constant_max_int = new (&allocator) HIntConstant(INT_MAX); + entry->AddInstruction(parameter1); + entry->AddInstruction(parameter2); + entry->AddInstruction(constant_1); + entry->AddInstruction(constant_0); + entry->AddInstruction(constant_max_int); + + HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block1); + HInstruction* cmp = new (&allocator) HLessThanOrEqual(parameter2, constant_0); + HIf* if_inst = new (&allocator) HIf(cmp); + block1->AddInstruction(cmp); + block1->AddInstruction(if_inst); + entry->AddSuccessor(block1); + + HBasicBlock* block2 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block2); + HInstruction* add = new (&allocator) HAdd(Primitive::kPrimInt, parameter2, constant_max_int); + HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0); + HArrayLength* array_length = new (&allocator) HArrayLength(null_check); + HInstruction* cmp2 = new (&allocator) HGreaterThanOrEqual(add, array_length); + if_inst = new (&allocator) HIf(cmp2); + block2->AddInstruction(add); + block2->AddInstruction(null_check); + block2->AddInstruction(array_length); + block2->AddInstruction(cmp2); + block2->AddInstruction(if_inst); + + HBasicBlock* block3 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block3); + HBoundsCheck* bounds_check = new (&allocator) + HBoundsCheck(add, array_length, 0); + HArraySet* array_set = new (&allocator) HArraySet( + null_check, bounds_check, constant_1, Primitive::kPrimInt, 0); + block3->AddInstruction(bounds_check); + block3->AddInstruction(array_set); + + HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit); + exit->AddInstruction(new (&allocator) HExit()); + block1->AddSuccessor(exit); // true successor + block1->AddSuccessor(block2); // false successor + block2->AddSuccessor(exit); // true successor + block2->AddSuccessor(block3); // false successor + block3->AddSuccessor(exit); + + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); +} + +// if (i < array.length) { +// int j = i - Integer.MAX_VALUE; +// j = j - Integer.MAX_VALUE; // j is (i+2) after substracting MAX_INT twice +// if (j > 0) array[j] = 1; // Can't eliminate. +// } +TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter1 = new (&allocator) + HParameterValue(0, Primitive::kPrimNot); // array + HInstruction* parameter2 = new (&allocator) + HParameterValue(0, Primitive::kPrimInt); // i + HInstruction* constant_1 = new (&allocator) HIntConstant(1); + HInstruction* constant_0 = new (&allocator) HIntConstant(0); + HInstruction* constant_max_int = new (&allocator) HIntConstant(INT_MAX); + entry->AddInstruction(parameter1); + entry->AddInstruction(parameter2); + entry->AddInstruction(constant_1); + entry->AddInstruction(constant_0); + entry->AddInstruction(constant_max_int); + + HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block1); + HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0); + HArrayLength* array_length = new (&allocator) HArrayLength(null_check); + HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(parameter2, array_length); + HIf* if_inst = new (&allocator) HIf(cmp); + block1->AddInstruction(null_check); + block1->AddInstruction(array_length); + block1->AddInstruction(cmp); + block1->AddInstruction(if_inst); + entry->AddSuccessor(block1); + + HBasicBlock* block2 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block2); + HInstruction* sub1 = new (&allocator) HSub(Primitive::kPrimInt, parameter2, constant_max_int); + HInstruction* sub2 = new (&allocator) HSub(Primitive::kPrimInt, sub1, constant_max_int); + HInstruction* cmp2 = new (&allocator) HLessThanOrEqual(sub2, constant_0); + if_inst = new (&allocator) HIf(cmp2); + block2->AddInstruction(sub1); + block2->AddInstruction(sub2); + block2->AddInstruction(cmp2); + block2->AddInstruction(if_inst); + + HBasicBlock* block3 = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block3); + HBoundsCheck* bounds_check = new (&allocator) + HBoundsCheck(sub2, array_length, 0); + HArraySet* array_set = new (&allocator) HArraySet( + null_check, bounds_check, constant_1, Primitive::kPrimInt, 0); + block3->AddInstruction(bounds_check); + block3->AddInstruction(array_set); + + HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit); + exit->AddInstruction(new (&allocator) HExit()); + block1->AddSuccessor(exit); // true successor + block1->AddSuccessor(block2); // false successor + block2->AddSuccessor(exit); // true successor + block2->AddSuccessor(block3); // false successor + block3->AddSuccessor(exit); + + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); +} + +// array[5] = 1; // Can't eliminate. +// array[4] = 1; // Can eliminate. +// array[6] = 1; // Can't eliminate. +TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* constant_5 = new (&allocator) HIntConstant(5); + HInstruction* constant_4 = new (&allocator) HIntConstant(4); + HInstruction* constant_6 = new (&allocator) HIntConstant(6); + HInstruction* constant_1 = new (&allocator) HIntConstant(1); + entry->AddInstruction(parameter); + entry->AddInstruction(constant_5); + entry->AddInstruction(constant_4); + entry->AddInstruction(constant_6); + entry->AddInstruction(constant_1); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + + HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); + HArrayLength* array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check5 = new (&allocator) + HBoundsCheck(constant_5, array_length, 0); + HInstruction* array_set = new (&allocator) HArraySet( + null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + block->AddInstruction(null_check); + block->AddInstruction(array_length); + block->AddInstruction(bounds_check5); + block->AddInstruction(array_set); + + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check4 = new (&allocator) + HBoundsCheck(constant_4, array_length, 0); + array_set = new (&allocator) HArraySet( + null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + block->AddInstruction(null_check); + block->AddInstruction(array_length); + block->AddInstruction(bounds_check4); + block->AddInstruction(array_set); + + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check6 = new (&allocator) + HBoundsCheck(constant_6, array_length, 0); + array_set = new (&allocator) HArraySet( + null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); + block->AddInstruction(null_check); + block->AddInstruction(array_length); + block->AddInstruction(bounds_check6); + block->AddInstruction(array_set); + + block->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit); + block->AddSuccessor(exit); + exit->AddInstruction(new (&allocator) HExit()); + + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check5)); + ASSERT_TRUE(IsRemoved(bounds_check4)); + ASSERT_FALSE(IsRemoved(bounds_check6)); +} + +// for (int i=initial; i<array.length; i+=increment) { array[i] = 10; } +static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, + HInstruction** bounds_check, + int initial, + int increment, + IfCondition cond = kCondGE) { + HGraph* graph = new (allocator) HGraph(allocator); + + HBasicBlock* entry = new (allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* constant_initial = new (allocator) HIntConstant(initial); + HInstruction* constant_increment = new (allocator) HIntConstant(increment); + HInstruction* constant_10 = new (allocator) HIntConstant(10); + entry->AddInstruction(parameter); + entry->AddInstruction(constant_initial); + entry->AddInstruction(constant_increment); + entry->AddInstruction(constant_10); + + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction(new (allocator) HGoto()); + + HBasicBlock* loop_header = new (allocator) HBasicBlock(graph); + HBasicBlock* loop_body = new (allocator) HBasicBlock(graph); + HBasicBlock* exit = new (allocator) HBasicBlock(graph); + + graph->AddBlock(loop_header); + graph->AddBlock(loop_body); + graph->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(exit); // true successor + loop_header->AddSuccessor(loop_body); // false successor + loop_body->AddSuccessor(loop_header); + + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + phi->AddInput(constant_initial); + HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); + HInstruction* array_length = new (allocator) HArrayLength(null_check); + HInstruction* cmp = nullptr; + if (cond == kCondGE) { + cmp = new (allocator) HGreaterThanOrEqual(phi, array_length); + } else { + DCHECK(cond == kCondGT); + cmp = new (allocator) HGreaterThan(phi, array_length); + } + HInstruction* if_inst = new (allocator) HIf(cmp); + loop_header->AddPhi(phi); + loop_header->AddInstruction(null_check); + loop_header->AddInstruction(array_length); + loop_header->AddInstruction(cmp); + loop_header->AddInstruction(if_inst); + + null_check = new (allocator) HNullCheck(parameter, 0); + array_length = new (allocator) HArrayLength(null_check); + *bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0); + HInstruction* array_set = new (allocator) HArraySet( + null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0); + + HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(*bounds_check); + loop_body->AddInstruction(array_set); + loop_body->AddInstruction(add); + loop_body->AddInstruction(new (allocator) HGoto()); + phi->AddInput(add); + + exit->AddInstruction(new (allocator) HExit()); + + return graph; +} + +TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + // for (int i=0; i<array.length; i++) { array[i] = 10; // Can eliminate with gvn. } + HInstruction* bounds_check = nullptr; + HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); + graph->BuildDominatorTree(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // This time add gvn. Need gvn to eliminate the second + // HArrayLength which uses the null check as its input. + graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_after_gvn(graph); + bounds_check_elimination_after_gvn.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } + graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); + bounds_check_elimination_with_initial_1.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } + graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); + bounds_check_elimination_with_initial_minus_1.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } + graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); + bounds_check_elimination_with_greater_than.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // for (int i=0; i<array.length; i += 2) { + // array[i] = 10; // Can't eliminate due to overflow concern. } + graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); + bounds_check_elimination_with_increment_2.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } + graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); + bounds_check_elimination_with_increment_2_from_1.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); +} + +// for (int i=array.length; i>0; i+=increment) { array[i-1] = 10; } +static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, + HInstruction** bounds_check, + int initial, + int increment = -1, + IfCondition cond = kCondLE) { + HGraph* graph = new (allocator) HGraph(allocator); + + HBasicBlock* entry = new (allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* constant_initial = new (allocator) HIntConstant(initial); + HInstruction* constant_increment = new (allocator) HIntConstant(increment); + HInstruction* constant_minus_1 = new (allocator) HIntConstant(-1); + HInstruction* constant_10 = new (allocator) HIntConstant(10); + entry->AddInstruction(parameter); + entry->AddInstruction(constant_initial); + entry->AddInstruction(constant_increment); + entry->AddInstruction(constant_minus_1); + entry->AddInstruction(constant_10); + + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); + HInstruction* array_length = new (allocator) HArrayLength(null_check); + block->AddInstruction(null_check); + block->AddInstruction(array_length); + block->AddInstruction(new (allocator) HGoto()); + + HBasicBlock* loop_header = new (allocator) HBasicBlock(graph); + HBasicBlock* loop_body = new (allocator) HBasicBlock(graph); + HBasicBlock* exit = new (allocator) HBasicBlock(graph); + + graph->AddBlock(loop_header); + graph->AddBlock(loop_body); + graph->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(exit); // true successor + loop_header->AddSuccessor(loop_body); // false successor + loop_body->AddSuccessor(loop_header); + + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + phi->AddInput(array_length); + HInstruction* cmp = nullptr; + if (cond == kCondLE) { + cmp = new (allocator) HLessThanOrEqual(phi, constant_initial); + } else { + DCHECK(cond == kCondLT); + cmp = new (allocator) HLessThan(phi, constant_initial); + } + HInstruction* if_inst = new (allocator) HIf(cmp); + loop_header->AddPhi(phi); + loop_header->AddInstruction(cmp); + loop_header->AddInstruction(if_inst); + + HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1); + null_check = new (allocator) HNullCheck(parameter, 0); + array_length = new (allocator) HArrayLength(null_check); + *bounds_check = new (allocator) HBoundsCheck(add, array_length, 0); + HInstruction* array_set = new (allocator) HArraySet( + null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0); + HInstruction* add_phi = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment); + loop_body->AddInstruction(add); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(*bounds_check); + loop_body->AddInstruction(array_set); + loop_body->AddInstruction(add_phi); + loop_body->AddInstruction(new (allocator) HGoto()); + phi->AddInput(add); + + exit->AddInstruction(new (allocator) HExit()); + + return graph; +} + +TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + // for (int i=array.length; i>0; i--) { array[i-1] = 10; // Can eliminate with gvn. } + HInstruction* bounds_check = nullptr; + HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0); + graph->BuildDominatorTree(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // This time add gvn. Need gvn to eliminate the second + // HArrayLength which uses the null check as its input. + graph = BuildSSAGraph2(&allocator, &bounds_check, 0); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_after_gvn(graph); + bounds_check_elimination_after_gvn.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } + graph = BuildSSAGraph2(&allocator, &bounds_check, 1); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); + bounds_check_elimination_with_initial_1.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } + graph = BuildSSAGraph2(&allocator, &bounds_check, -1); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); + bounds_check_elimination_with_initial_minus_1.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } + graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_less_than(graph); + bounds_check_elimination_with_less_than.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } + graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); + bounds_check_elimination_increment_minus_2.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); +} + +// int[] array = new array[10]; +// for (int i=0; i<10; i+=increment) { array[i] = 10; } +static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, + HInstruction** bounds_check, + int initial, + int increment, + IfCondition cond) { + HGraph* graph = new (allocator) HGraph(allocator); + + HBasicBlock* entry = new (allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* constant_10 = new (allocator) HIntConstant(10); + HInstruction* constant_initial = new (allocator) HIntConstant(initial); + HInstruction* constant_increment = new (allocator) HIntConstant(increment); + entry->AddInstruction(constant_10); + entry->AddInstruction(constant_initial); + entry->AddInstruction(constant_increment); + + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + HInstruction* new_array = new (allocator) + HNewArray(constant_10, 0, Primitive::kPrimInt); + block->AddInstruction(new_array); + block->AddInstruction(new (allocator) HGoto()); + + HBasicBlock* loop_header = new (allocator) HBasicBlock(graph); + HBasicBlock* loop_body = new (allocator) HBasicBlock(graph); + HBasicBlock* exit = new (allocator) HBasicBlock(graph); + + graph->AddBlock(loop_header); + graph->AddBlock(loop_body); + graph->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(exit); // true successor + loop_header->AddSuccessor(loop_body); // false successor + loop_body->AddSuccessor(loop_header); + + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + phi->AddInput(constant_initial); + HInstruction* cmp = nullptr; + if (cond == kCondGE) { + cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10); + } else { + DCHECK(cond == kCondGT); + cmp = new (allocator) HGreaterThan(phi, constant_10); + } + HInstruction* if_inst = new (allocator) HIf(cmp); + loop_header->AddPhi(phi); + loop_header->AddInstruction(cmp); + loop_header->AddInstruction(if_inst); + + HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0); + HArrayLength* array_length = new (allocator) HArrayLength(null_check); + *bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0); + HInstruction* array_set = new (allocator) HArraySet( + null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0); + HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(*bounds_check); + loop_body->AddInstruction(array_set); + loop_body->AddInstruction(add); + loop_body->AddInstruction(new (allocator) HGoto()); + phi->AddInput(add); + + exit->AddInstruction(new (allocator) HExit()); + + return graph; +} + +TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + // int[] array = new array[10]; + // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. } + HInstruction* bounds_check = nullptr; + HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_after_gvn(graph); + bounds_check_elimination_after_gvn.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // int[] array = new array[10]; + // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } + graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); + bounds_check_elimination_with_initial_1.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // int[] array = new array[10]; + // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } + graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); + bounds_check_elimination_with_greater_than.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // int[] array = new array[10]; + // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } + graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_increment_8(graph); + bounds_check_elimination_increment_8.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); +} + +// for (int i=initial; i<array.length; i++) { array[array.length-i-1] = 10; } +static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, + HInstruction** bounds_check, + int initial, + IfCondition cond = kCondGE) { + HGraph* graph = new (allocator) HGraph(allocator); + + HBasicBlock* entry = new (allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* constant_initial = new (allocator) HIntConstant(initial); + HInstruction* constant_1 = new (allocator) HIntConstant(1); + HInstruction* constant_10 = new (allocator) HIntConstant(10); + HInstruction* constant_minus_1 = new (allocator) HIntConstant(-1); + entry->AddInstruction(parameter); + entry->AddInstruction(constant_initial); + entry->AddInstruction(constant_1); + entry->AddInstruction(constant_10); + entry->AddInstruction(constant_minus_1); + + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction(new (allocator) HGoto()); + + HBasicBlock* loop_header = new (allocator) HBasicBlock(graph); + HBasicBlock* loop_body = new (allocator) HBasicBlock(graph); + HBasicBlock* exit = new (allocator) HBasicBlock(graph); + + graph->AddBlock(loop_header); + graph->AddBlock(loop_body); + graph->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(exit); // true successor + loop_header->AddSuccessor(loop_body); // false successor + loop_body->AddSuccessor(loop_header); + + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + phi->AddInput(constant_initial); + HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); + HInstruction* array_length = new (allocator) HArrayLength(null_check); + HInstruction* cmp = nullptr; + if (cond == kCondGE) { + cmp = new (allocator) HGreaterThanOrEqual(phi, array_length); + } else if (cond == kCondGT) { + cmp = new (allocator) HGreaterThan(phi, array_length); + } + HInstruction* if_inst = new (allocator) HIf(cmp); + loop_header->AddPhi(phi); + loop_header->AddInstruction(null_check); + loop_header->AddInstruction(array_length); + loop_header->AddInstruction(cmp); + loop_header->AddInstruction(if_inst); + + null_check = new (allocator) HNullCheck(parameter, 0); + array_length = new (allocator) HArrayLength(null_check); + HInstruction* sub = new (allocator) HSub(Primitive::kPrimInt, array_length, phi); + HInstruction* add_minus_1 = new (allocator) + HAdd(Primitive::kPrimInt, sub, constant_minus_1); + *bounds_check = new (allocator) HBoundsCheck(add_minus_1, array_length, 0); + HInstruction* array_set = new (allocator) HArraySet( + null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0); + HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_1); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(sub); + loop_body->AddInstruction(add_minus_1); + loop_body->AddInstruction(*bounds_check); + loop_body->AddInstruction(array_set); + loop_body->AddInstruction(add); + loop_body->AddInstruction(new (allocator) HGoto()); + phi->AddInput(add); + + exit->AddInstruction(new (allocator) HExit()); + + return graph; +} + +TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + // for (int i=0; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate with gvn. } + HInstruction* bounds_check = nullptr; + HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0); + graph->BuildDominatorTree(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); + + // This time add gvn. Need gvn to eliminate the second + // HArrayLength which uses the null check as its input. + graph = BuildSSAGraph4(&allocator, &bounds_check, 0); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_after_gvn(graph); + bounds_check_elimination_after_gvn.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } + graph = BuildSSAGraph4(&allocator, &bounds_check, 1); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); + bounds_check_elimination_with_initial_1.Run(); + ASSERT_TRUE(IsRemoved(bounds_check)); + + // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } + graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); + bounds_check_elimination_with_greater_than.Run(); + ASSERT_FALSE(IsRemoved(bounds_check)); +} + +// Bubble sort: +// (Every array access bounds-check can be eliminated.) +// for (int i=0; i<array.length-1; i++) { +// for (int j=0; j<array.length-i-1; j++) { +// if (array[j] > array[j+1]) { +// int temp = array[j+1]; +// array[j+1] = array[j]; +// array[j] = temp; +// } +// } +// } +TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* constant_0 = new (&allocator) HIntConstant(0); + HInstruction* constant_minus_1 = new (&allocator) HIntConstant(-1); + HInstruction* constant_1 = new (&allocator) HIntConstant(1); + entry->AddInstruction(parameter); + entry->AddInstruction(constant_0); + entry->AddInstruction(constant_minus_1); + entry->AddInstruction(constant_1); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit); + exit->AddInstruction(new (&allocator) HExit()); + + HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph); + graph->AddBlock(outer_header); + HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt); + phi_i->AddInput(constant_0); + HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); + HArrayLength* array_length = new (&allocator) HArrayLength(null_check); + HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1); + HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(phi_i, add); + HIf* if_inst = new (&allocator) HIf(cmp); + outer_header->AddPhi(phi_i); + outer_header->AddInstruction(null_check); + outer_header->AddInstruction(array_length); + outer_header->AddInstruction(add); + outer_header->AddInstruction(cmp); + outer_header->AddInstruction(if_inst); + + HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph); + graph->AddBlock(inner_header); + HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt); + phi_j->AddInput(constant_0); + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i); + add = new (&allocator) HAdd(Primitive::kPrimInt, sub, constant_minus_1); + cmp = new (&allocator) HGreaterThanOrEqual(phi_j, add); + if_inst = new (&allocator) HIf(cmp); + inner_header->AddPhi(phi_j); + inner_header->AddInstruction(null_check); + inner_header->AddInstruction(array_length); + inner_header->AddInstruction(sub); + inner_header->AddInstruction(add); + inner_header->AddInstruction(cmp); + inner_header->AddInstruction(if_inst); + + HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph); + graph->AddBlock(inner_body_compare); + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check1 = new (&allocator) HBoundsCheck(phi_j, array_length, 0); + HArrayGet* array_get_j = new (&allocator) + HArrayGet(null_check, bounds_check1, Primitive::kPrimInt); + inner_body_compare->AddInstruction(null_check); + inner_body_compare->AddInstruction(array_length); + inner_body_compare->AddInstruction(bounds_check1); + inner_body_compare->AddInstruction(array_get_j); + HInstruction* j_plus_1 = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1); + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HBoundsCheck* bounds_check2 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0); + HArrayGet* array_get_j_plus_1 = new (&allocator) + HArrayGet(null_check, bounds_check2, Primitive::kPrimInt); + cmp = new (&allocator) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1); + if_inst = new (&allocator) HIf(cmp); + inner_body_compare->AddInstruction(j_plus_1); + inner_body_compare->AddInstruction(null_check); + inner_body_compare->AddInstruction(array_length); + inner_body_compare->AddInstruction(bounds_check2); + inner_body_compare->AddInstruction(array_get_j_plus_1); + inner_body_compare->AddInstruction(cmp); + inner_body_compare->AddInstruction(if_inst); + + HBasicBlock* inner_body_swap = new (&allocator) HBasicBlock(graph); + graph->AddBlock(inner_body_swap); + j_plus_1 = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1); + // temp = array[j+1] + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HInstruction* bounds_check3 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0); + array_get_j_plus_1 = new (&allocator) + HArrayGet(null_check, bounds_check3, Primitive::kPrimInt); + inner_body_swap->AddInstruction(j_plus_1); + inner_body_swap->AddInstruction(null_check); + inner_body_swap->AddInstruction(array_length); + inner_body_swap->AddInstruction(bounds_check3); + inner_body_swap->AddInstruction(array_get_j_plus_1); + // array[j+1] = array[j] + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HInstruction* bounds_check4 = new (&allocator) HBoundsCheck(phi_j, array_length, 0); + array_get_j = new (&allocator) + HArrayGet(null_check, bounds_check4, Primitive::kPrimInt); + inner_body_swap->AddInstruction(null_check); + inner_body_swap->AddInstruction(array_length); + inner_body_swap->AddInstruction(bounds_check4); + inner_body_swap->AddInstruction(array_get_j); + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HInstruction* bounds_check5 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0); + HArraySet* array_set_j_plus_1 = new (&allocator) + HArraySet(null_check, bounds_check5, array_get_j, Primitive::kPrimInt, 0); + inner_body_swap->AddInstruction(null_check); + inner_body_swap->AddInstruction(array_length); + inner_body_swap->AddInstruction(bounds_check5); + inner_body_swap->AddInstruction(array_set_j_plus_1); + // array[j] = temp + null_check = new (&allocator) HNullCheck(parameter, 0); + array_length = new (&allocator) HArrayLength(null_check); + HInstruction* bounds_check6 = new (&allocator) HBoundsCheck(phi_j, array_length, 0); + HArraySet* array_set_j = new (&allocator) + HArraySet(null_check, bounds_check6, array_get_j_plus_1, Primitive::kPrimInt, 0); + inner_body_swap->AddInstruction(null_check); + inner_body_swap->AddInstruction(array_length); + inner_body_swap->AddInstruction(bounds_check6); + inner_body_swap->AddInstruction(array_set_j); + inner_body_swap->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* inner_body_add = new (&allocator) HBasicBlock(graph); + graph->AddBlock(inner_body_add); + add = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1); + inner_body_add->AddInstruction(add); + inner_body_add->AddInstruction(new (&allocator) HGoto()); + phi_j->AddInput(add); + + HBasicBlock* outer_body_add = new (&allocator) HBasicBlock(graph); + graph->AddBlock(outer_body_add); + add = new (&allocator) HAdd(Primitive::kPrimInt, phi_i, constant_1); + outer_body_add->AddInstruction(add); + outer_body_add->AddInstruction(new (&allocator) HGoto()); + phi_i->AddInput(add); + + block->AddSuccessor(outer_header); + outer_header->AddSuccessor(exit); + outer_header->AddSuccessor(inner_header); + inner_header->AddSuccessor(outer_body_add); + inner_header->AddSuccessor(inner_body_compare); + inner_body_compare->AddSuccessor(inner_body_add); + inner_body_compare->AddSuccessor(inner_body_swap); + inner_body_swap->AddSuccessor(inner_body_add); + inner_body_add->AddSuccessor(inner_header); + outer_body_add->AddSuccessor(outer_header); + + graph->BuildDominatorTree(); + GlobalValueNumberer(&allocator, graph).Run(); + // gvn should remove the same bounds check. + ASSERT_FALSE(IsRemoved(bounds_check1)); + ASSERT_FALSE(IsRemoved(bounds_check2)); + ASSERT_TRUE(IsRemoved(bounds_check3)); + ASSERT_TRUE(IsRemoved(bounds_check4)); + ASSERT_TRUE(IsRemoved(bounds_check5)); + ASSERT_TRUE(IsRemoved(bounds_check6)); + + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); + ASSERT_TRUE(IsRemoved(bounds_check1)); + ASSERT_TRUE(IsRemoved(bounds_check2)); + ASSERT_TRUE(IsRemoved(bounds_check3)); + ASSERT_TRUE(IsRemoved(bounds_check4)); + ASSERT_TRUE(IsRemoved(bounds_check5)); + ASSERT_TRUE(IsRemoved(bounds_check6)); +} + +} // namespace art diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 777a117a3b..0a3f830247 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -16,6 +16,7 @@ #include "builder.h" +#include "base/logging.h" #include "class_linker.h" #include "dex_file.h" #include "dex_file-inl.h" @@ -68,6 +69,74 @@ class Temporaries : public ValueObject { size_t index_; }; +class SwitchTable : public ValueObject { + public: + SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) + : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { + int32_t table_offset = instruction.VRegB_31t(); + const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; + if (sparse) { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); + } else { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); + } + num_entries_ = table[1]; + values_ = reinterpret_cast<const int32_t*>(&table[2]); + } + + uint16_t GetNumEntries() const { + return num_entries_; + } + + void CheckIndex(size_t index) const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); + } else { + // In a packed table, we have the starting key and num_entries_ values. + DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); + } + } + + int32_t GetEntryAt(size_t index) const { + CheckIndex(index); + return values_[index]; + } + + uint32_t GetDexPcForIndex(size_t index) const { + CheckIndex(index); + return dex_pc_ + + (reinterpret_cast<const int16_t*>(values_ + index) - + reinterpret_cast<const int16_t*>(&instruction_)); + } + + // Index of the first value in the table. + size_t GetFirstValueIndex() const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + return num_entries_; + } else { + // In a packed table, we have the starting key and num_entries_ values. + return 1; + } + } + + private: + const Instruction& instruction_; + const uint32_t dex_pc_; + + // Whether this is a sparse-switch table (or a packed-switch one). + const bool sparse_; + + // This can't be const as it needs to be computed off of the given instruction, and complicated + // expressions in the initializer list seemed very ugly. + uint16_t num_entries_; + + const int32_t* values_; + + DISALLOW_COPY_AND_ASSIGN(SwitchTable); +}; + void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); locals_.SetSize(count); @@ -155,6 +224,37 @@ void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) { current_block_ = nullptr; } +static bool ShouldSkipCompilation(const CompilerDriver& compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + size_t number_of_dex_instructions, + size_t number_of_blocks ATTRIBUTE_UNUSED, + size_t number_of_branches) { + const CompilerOptions& compiler_options = compiler_driver.GetCompilerOptions(); + CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter(); + if (compiler_filter == CompilerOptions::kEverything) { + return false; + } + + if (compiler_options.IsHugeMethod(number_of_dex_instructions)) { + LOG(INFO) << "Skip compilation of huge method " + << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(), + *dex_compilation_unit.GetDexFile()) + << ": " << number_of_dex_instructions << " dex instructions"; + return true; + } + + // If it's large and contains no branches, it's likely to be machine generated initialization. + if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) { + LOG(INFO) << "Skip compilation of large method with no branch " + << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(), + *dex_compilation_unit.GetDexFile()) + << ": " << number_of_dex_instructions << " dex instructions"; + return true; + } + + return false; +} + HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { const uint16_t* code_ptr = code_item.insns_; const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_; @@ -171,9 +271,27 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { InitializeLocals(code_item.registers_size_); graph_->UpdateMaximumNumberOfOutVRegs(code_item.outs_size_); + // Compute the number of dex instructions, blocks, and branches. We will + // check these values against limits given to the compiler. + size_t number_of_dex_instructions = 0; + size_t number_of_blocks = 0; + size_t number_of_branches = 0; + // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets(code_ptr, code_end); + ComputeBranchTargets( + code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches); + + // Note that the compiler driver is null when unit testing. + if (compiler_driver_ != nullptr) { + if (ShouldSkipCompilation(*compiler_driver_, + *dex_compilation_unit_, + number_of_dex_instructions, + number_of_blocks, + number_of_branches)) { + return nullptr; + } + } // Also create blocks for catch handlers. if (code_item.tries_size_ != 0) { @@ -232,8 +350,11 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { current_block_ = block; } -void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end) { - // TODO: Support switch instructions. +void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, + const uint16_t* code_end, + size_t* number_of_dex_instructions, + size_t* number_of_blocks, + size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); // Create the first block for the dex instructions, single successor of the entry block. @@ -243,21 +364,60 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_ // Iterate over all instructions and find branching instructions. Create blocks for // the locations these instructions branch to. - size_t dex_pc = 0; + uint32_t dex_pc = 0; while (code_ptr < code_end) { + (*number_of_dex_instructions)++; const Instruction& instruction = *Instruction::At(code_ptr); if (instruction.IsBranch()) { + (*number_of_branches)++; int32_t target = instruction.GetTargetOffset() + dex_pc; // Create a block for the target instruction. if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); + (*number_of_blocks)++; } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); + (*number_of_blocks)++; + } + } else if (instruction.IsSwitch()) { + SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); + + uint16_t num_entries = table.GetNumEntries(); + + // In a packed-switch, the entry at index 0 is the starting key. In a sparse-switch, the + // entry at index 0 is the first key, and values are after *all* keys. + size_t offset = table.GetFirstValueIndex(); + + // Use a larger loop counter type to avoid overflow issues. + for (size_t i = 0; i < num_entries; ++i) { + // The target of the case. + uint32_t target = dex_pc + table.GetEntryAt(i + offset); + if (FindBlockStartingAt(target) == nullptr) { + block = new (arena_) HBasicBlock(graph_, target); + branch_targets_.Put(target, block); + (*number_of_blocks)++; + } + + // The next case gets its own block. + if (i < num_entries) { + block = new (arena_) HBasicBlock(graph_, target); + branch_targets_.Put(table.GetDexPcForIndex(i), block); + (*number_of_blocks)++; + } + } + + // Fall-through. Add a block if there is more code afterwards. + dex_pc += instruction.SizeInCodeUnits(); + code_ptr += instruction.SizeInCodeUnits(); + if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + block = new (arena_) HBasicBlock(graph_, dex_pc); + branch_targets_.Put(dex_pc, block); + (*number_of_blocks)++; } } else { code_ptr += instruction.SizeInCodeUnits(); @@ -280,9 +440,10 @@ void HGraphBuilder::Unop_12x(const Instruction& instruction, Primitive::Type typ void HGraphBuilder::Conversion_12x(const Instruction& instruction, Primitive::Type input_type, - Primitive::Type result_type) { + Primitive::Type result_type, + uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegB(), input_type); - current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first)); + current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } @@ -423,38 +584,36 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, bool is_instance_call = invoke_type != kStatic; const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); - HInvoke* invoke = nullptr; - if (invoke_type == kVirtual || invoke_type == kInterface || invoke_type == kSuper) { - MethodReference target_method(dex_file_, method_idx); - uintptr_t direct_code; - uintptr_t direct_method; - int table_index; - InvokeType optimized_invoke_type = invoke_type; - compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true, - &optimized_invoke_type, &target_method, &table_index, - &direct_code, &direct_method); - if (table_index == -1) { - return false; - } + MethodReference target_method(dex_file_, method_idx); + uintptr_t direct_code; + uintptr_t direct_method; + int table_index; + InvokeType optimized_invoke_type = invoke_type; + + if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true, + &optimized_invoke_type, &target_method, &table_index, + &direct_code, &direct_method)) { + LOG(INFO) << "Did not compile " << PrettyMethod(method_idx, *dex_file_) + << " because a method call could not be resolved"; + return false; + } + DCHECK(optimized_invoke_type != kSuper); - if (optimized_invoke_type == kVirtual) { - invoke = new (arena_) HInvokeVirtual( - arena_, number_of_arguments, return_type, dex_pc, table_index); - } else if (optimized_invoke_type == kInterface) { - invoke = new (arena_) HInvokeInterface( - arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); - } else if (optimized_invoke_type == kDirect) { - // For this compiler, sharpening only works if we compile PIC. - DCHECK(compiler_driver_->GetCompilerOptions().GetCompilePic()); - // Treat invoke-direct like static calls for now. - invoke = new (arena_) HInvokeStatic( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index); - } + HInvoke* invoke = nullptr; + if (optimized_invoke_type == kVirtual) { + invoke = new (arena_) HInvokeVirtual( + arena_, number_of_arguments, return_type, dex_pc, table_index); + } else if (optimized_invoke_type == kInterface) { + invoke = new (arena_) HInvokeInterface( + arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); } else { - DCHECK(invoke_type == kDirect || invoke_type == kStatic); + DCHECK(optimized_invoke_type == kDirect || optimized_invoke_type == kStatic); + // Sharpening to kDirect only works if we compile PIC. + DCHECK((optimized_invoke_type == invoke_type) || (optimized_invoke_type != kDirect) + || compiler_driver_->GetCompilerOptions().GetCompilePic()); // Treat invoke-direct like static calls for now. invoke = new (arena_) HInvokeStatic( - arena_, number_of_arguments, return_type, dex_pc, method_idx); + arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index); } size_t start_index = 0; @@ -808,6 +967,85 @@ bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction, return true; } +bool HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) { + SwitchTable table(instruction, dex_pc, false); + + // Value to test against. + HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + + uint16_t num_entries = table.GetNumEntries(); + // There should be at least one entry here. + DCHECK_GT(num_entries, 0U); + + // Chained cmp-and-branch, starting from starting_key. + int32_t starting_key = table.GetEntryAt(0); + + for (size_t i = 1; i <= num_entries; i++) { + BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, starting_key + i - 1, + table.GetEntryAt(i), dex_pc); + } + return true; +} + +bool HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) { + SwitchTable table(instruction, dex_pc, true); + + // Value to test against. + HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + + uint16_t num_entries = table.GetNumEntries(); + // There should be at least one entry here. + DCHECK_GT(num_entries, 0U); + + for (size_t i = 0; i < num_entries; i++) { + BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value, + table.GetEntryAt(i), table.GetEntryAt(i + num_entries), dex_pc); + } + return true; +} + +void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t index, + bool is_last_case, const SwitchTable& table, + HInstruction* value, int32_t case_value_int, + int32_t target_offset, uint32_t dex_pc) { + PotentiallyAddSuspendCheck(target_offset, dex_pc); + + // The current case's value. + HInstruction* this_case_value = GetIntConstant(case_value_int); + + // Compare value and this_case_value. + HEqual* comparison = new (arena_) HEqual(value, this_case_value); + current_block_->AddInstruction(comparison); + HInstruction* ifinst = new (arena_) HIf(comparison); + current_block_->AddInstruction(ifinst); + + // Case hit: use the target offset to determine where to go. + HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset); + DCHECK(case_target != nullptr); + current_block_->AddSuccessor(case_target); + + // Case miss: go to the next case (or default fall-through). + // When there is a next case, we use the block stored with the table offset representing this + // case (that is where we registered them in ComputeBranchTargets). + // When there is no next case, we use the following instruction. + // TODO: Find a good way to peel the last iteration to avoid conditional, but still have re-use. + if (!is_last_case) { + HBasicBlock* next_case_target = FindBlockStartingAt(table.GetDexPcForIndex(index)); + DCHECK(next_case_target != nullptr); + current_block_->AddSuccessor(next_case_target); + + // Need to manually add the block, as there is no dex-pc transition for the cases. + graph_->AddBlock(next_case_target); + + current_block_ = next_case_target; + } else { + HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits()); + DCHECK(default_target != nullptr); + current_block_->AddSuccessor(default_target); + current_block_ = nullptr; + } +} + void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc) { if (target_offset <= 0) { // Unconditionnally add a suspend check to backward branches. We can remove @@ -1024,47 +1262,77 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::INT_TO_LONG: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong); + Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc); break; } case Instruction::INT_TO_FLOAT: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat); + Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc); break; } case Instruction::INT_TO_DOUBLE: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble); + Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc); break; } case Instruction::LONG_TO_INT: { - Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt); + Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc); break; } case Instruction::LONG_TO_FLOAT: { - Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat); + Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc); break; } case Instruction::LONG_TO_DOUBLE: { - Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble); + Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc); + break; + } + + case Instruction::FLOAT_TO_INT: { + Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc); + break; + } + + case Instruction::FLOAT_TO_LONG: { + Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc); + break; + } + + case Instruction::FLOAT_TO_DOUBLE: { + Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc); + break; + } + + case Instruction::DOUBLE_TO_INT: { + Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc); + break; + } + + case Instruction::DOUBLE_TO_LONG: { + Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc); + break; + } + + case Instruction::DOUBLE_TO_FLOAT: { + Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc); break; } case Instruction::INT_TO_BYTE: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte); + Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc); break; } case Instruction::INT_TO_SHORT: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort); + Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc); break; } case Instruction::INT_TO_CHAR: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar); + Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc); break; } @@ -1167,6 +1435,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::REM_FLOAT: { + Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc); + break; + } + + case Instruction::REM_DOUBLE: { + Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc); + break; + } + case Instruction::AND_INT: { Binop_23x<HAnd>(instruction, Primitive::kPrimInt); break; @@ -1306,6 +1584,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::REM_FLOAT_2ADDR: { + Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc); + break; + } + + case Instruction::REM_DOUBLE_2ADDR: { + Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc); + break; + } + case Instruction::SHL_INT_2ADDR: { Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt); break; @@ -1700,6 +1988,20 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::PACKED_SWITCH: { + if (!BuildPackedSwitch(instruction, dex_pc)) { + return false; + } + break; + } + + case Instruction::SPARSE_SWITCH: { + if (!BuildSparseSwitch(instruction, dex_pc)) { + return false; + } + break; + } + default: return false; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 25781b08f4..73c2f50958 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -29,6 +29,7 @@ namespace art { class Instruction; +class SwitchTable; class HGraphBuilder : public ValueObject { public: @@ -80,7 +81,13 @@ class HGraphBuilder : public ValueObject { // Finds all instructions that start a new block, and populates branch_targets_ with // the newly created blocks. - void ComputeBranchTargets(const uint16_t* start, const uint16_t* end); + // As a side effect, also compute the number of dex instructions, blocks, and + // branches. + void ComputeBranchTargets(const uint16_t* start, + const uint16_t* end, + size_t* number_of_dex_instructions, + size_t* number_of_block, + size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); HBasicBlock* FindBlockStartingAt(int32_t index) const; @@ -129,7 +136,8 @@ class HGraphBuilder : public ValueObject { void Conversion_12x(const Instruction& instruction, Primitive::Type input_type, - Primitive::Type result_type); + Primitive::Type result_type, + uint32_t dex_pc); void BuildCheckedDivRem(uint16_t out_reg, uint16_t first_reg, @@ -196,6 +204,17 @@ class HGraphBuilder : public ValueObject { uint16_t type_index, uint32_t dex_pc); + // Builds an instruction sequence for a packed switch statement. + bool BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc); + + // Builds an instruction sequence for a sparse switch statement. + bool BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc); + + void BuildSwitchCaseHelper(const Instruction& instruction, size_t index, + bool is_last_case, const SwitchTable& table, + HInstruction* value, int32_t case_value_int, + int32_t target_offset, uint32_t dex_pc); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index e581af22aa..461409ddca 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -499,6 +499,29 @@ void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) { } void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { + if (instruction != nullptr) { + // The code generated for some type conversions may call the + // runtime, thus normally requiring a subsequent call to this + // method. However, the method verifier does not produce PC + // information for certain instructions, which are considered "atomic" + // (they cannot join a GC). + // Therefore we do not currently record PC information for such + // instructions. As this may change later, we added this special + // case so that code generators may nevertheless call + // CodeGenerator::RecordPcInfo without triggering an error in + // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x") + // thereafter. + if (instruction->IsTypeConversion()) { + return; + } + if (instruction->IsRem()) { + Primitive::Type type = instruction->AsRem()->GetResultType(); + if ((type == Primitive::kPrimFloat) || (type == Primitive::kPrimDouble)) { + return; + } + } + } + // Collect PC infos for the mapping table. struct PcInfo pc_info; pc_info.dex_pc = dex_pc; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 4c0d3ea960..1d42c47d56 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -35,6 +35,11 @@ static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); // Binary encoding of 2^31 for type double. static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); +// Maximum value for a primitive integer. +static int32_t constexpr kPrimIntMax = 0x7fffffff; +// Maximum value for a primitive long. +static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; + class Assembler; class CodeGenerator; class DexCompilationUnit; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 5b2be2e9a1..cbe5f0cc6e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -44,8 +44,9 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static constexpr SRegister kRuntimeParameterFpuRegisters[] = { }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { public: @@ -874,6 +875,7 @@ void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset, || instruction->IsBoundsCheck() || instruction->IsNullCheck() || instruction->IsDivZeroCheck() + || instruction->GetLocations()->CanCall() || !IsLeafMethod()); } @@ -1359,11 +1361,20 @@ void InstructionCodeGeneratorARM::VisitNeg(HNeg* neg) { } void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); DCHECK_NE(result_type, input_type); + + // The float-to-long and double-to-long type conversions rely on a + // call to the runtime. + LocationSummary::CallKind call_kind = + ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) + && result_type == Primitive::kPrimLong) + ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1406,9 +1417,17 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: + // Processing a Dex `float-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1428,11 +1447,24 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type << " to " - << result_type << " not yet implemented"; + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-long' instruction. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation( + calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterPairLocation(R0, R1)); break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-long' instruction. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterPairLocation( + calling_convention.GetFpuRegisterAt(0), + calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(Location::RegisterPairLocation(R0, R1)); + break; + } default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -1478,8 +1510,9 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-float' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1509,8 +1542,9 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `float-to-double' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1580,11 +1614,24 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-int' instruction. + SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>(); + __ vmovs(temp, in.AsFpuRegister<SRegister>()); + __ vcvtis(temp, temp); + __ vmovrs(out.AsRegister<Register>(), temp); break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-int' instruction. + SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>(); + DRegister temp_d = FromLowSToD(temp_s); + __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); + __ vcvtid(temp_s, temp_d); + __ vmovrs(out.AsRegister<Register>(), temp_s); + break; + } default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -1609,9 +1656,17 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio break; case Primitive::kPrimFloat: + // Processing a Dex `float-to-long' instruction. + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l), + conversion, + conversion->GetDexPc()); + break; + case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type << " to " - << result_type << " not yet implemented"; + // Processing a Dex `double-to-long' instruction. + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l), + conversion, + conversion->GetDexPc()); break; default: @@ -1690,8 +1745,9 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio } case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-float' instruction. + __ vcvtsd(out.AsFpuRegister<SRegister>(), + FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); break; default: @@ -1746,8 +1802,9 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio } case Primitive::kPrimFloat: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `float-to-double' instruction. + __ vcvtds(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), + in.AsFpuRegister<SRegister>()); break; default: @@ -2075,12 +2132,13 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { } void LocationsBuilderARM::VisitRem(HRem* rem) { - LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong - ? LocationSummary::kCall - : LocationSummary::kNoCall; + Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = type == Primitive::kPrimInt + ? LocationSummary::kNoCall + : LocationSummary::kCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); - switch (rem->GetResultType()) { + switch (type) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2098,14 +2156,26 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(R2, R3)); break; } - case Primitive::kPrimFloat: + case Primitive::kPrimFloat: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(Location::FpuRegisterLocation(S0)); + break; + } + case Primitive::kPrimDouble: { - LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType(); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterPairLocation( + calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1))); + locations->SetInAt(1, Location::FpuRegisterPairLocation( + calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3))); + locations->SetOut(Location::Location::FpuRegisterPairLocation(S0, S1)); break; } default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } @@ -2115,7 +2185,8 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { Location first = locations->InAt(0); Location second = locations->InAt(1); - switch (rem->GetResultType()) { + Primitive::Type type = rem->GetResultType(); + switch (type) { case Primitive::kPrimInt: { Register reg1 = first.AsRegister<Register>(); Register reg2 = second.AsRegister<Register>(); @@ -2131,26 +2202,22 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { } case Primitive::kPrimLong: { - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>()); - DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); - DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>()); - DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>()); - DCHECK_EQ(R2, out.AsRegisterPairLow<Register>()); - DCHECK_EQ(R3, out.AsRegisterPairHigh<Register>()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc()); break; } - case Primitive::kPrimFloat: + case Primitive::kPrimFloat: { + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc()); + break; + } + case Primitive::kPrimDouble: { - LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType(); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc()); break; } default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index fd794f95d1..8a0c2deab9 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1326,11 +1326,20 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { } void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); DCHECK_NE(result_type, input_type); + + // The float-to-long and double-to-long type conversions rely on a + // call to the runtime. + LocationSummary::CallKind call_kind = + ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) + && result_type == Primitive::kPrimLong) + ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1373,9 +1382,17 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: + // Processing a Dex `float-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1395,10 +1412,30 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type << " to " - << result_type << " not yet implemented"; + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-long' instruction. + InvokeRuntimeCallingConvention calling_convention; + // Note that on x86 floating-point parameters are passed + // through core registers (here, EAX). + locations->SetInAt(0, Location::RegisterLocation( + calling_convention.GetRegisterAt(0))); + // The runtime helper puts the result in EAX, EDX. + locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); + break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-long' instruction. + InvokeRuntimeCallingConvention calling_convention; + // Note that on x86 floating-point parameters are passed + // through core registers (here, EAX and ECX). + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), + calling_convention.GetRegisterAt(1))); + // The runtime helper puts the result in EAX, EDX. + locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); + break; + } break; default: @@ -1443,8 +1480,9 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-float' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1473,8 +1511,9 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `float-to-double' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1559,11 +1598,55 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-int' instruction. + XmmRegister input = in.AsFpuRegister<XmmRegister>(); + Register output = out.AsRegister<Register>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Label done, nan; + + __ movl(output, Immediate(kPrimIntMax)); + // temp = int-to-float(output) + __ cvtsi2ss(temp, output); + // if input >= temp goto done + __ comiss(input, temp); + __ j(kAboveEqual, &done); + // if input == NaN goto nan + __ j(kUnordered, &nan); + // output = float-to-int-truncate(input) + __ cvttss2si(output, input); + __ jmp(&done); + __ Bind(&nan); + // output = 0 + __ xorl(output, output); + __ Bind(&done); break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-int' instruction. + XmmRegister input = in.AsFpuRegister<XmmRegister>(); + Register output = out.AsRegister<Register>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Label done, nan; + + __ movl(output, Immediate(kPrimIntMax)); + // temp = int-to-double(output) + __ cvtsi2sd(temp, output); + // if input >= temp goto done + __ comisd(input, temp); + __ j(kAboveEqual, &done); + // if input == NaN goto nan + __ j(kUnordered, &nan); + // output = double-to-int-truncate(input) + __ cvttsd2si(output, input); + __ jmp(&done); + __ Bind(&nan); + // output = 0 + __ xorl(output, output); + __ Bind(&done); + break; + } default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -1585,9 +1668,15 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio break; case Primitive::kPrimFloat: + // Processing a Dex `float-to-long' instruction. + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pF2l))); + codegen_->RecordPcInfo(conversion, conversion->GetDexPc()); + break; + case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type << " to " - << result_type << " not yet implemented"; + // Processing a Dex `double-to-long' instruction. + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pD2l))); + codegen_->RecordPcInfo(conversion, conversion->GetDexPc()); break; default: @@ -1664,8 +1753,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-float' instruction. + __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); break; default: @@ -1711,8 +1800,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } case Primitive::kPrimFloat: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `float-to-double' instruction. + __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); break; default: @@ -2117,12 +2206,13 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { } void LocationsBuilderX86::VisitRem(HRem* rem) { - LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong - ? LocationSummary::kCall - : LocationSummary::kNoCall; + Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = type == Primitive::kPrimInt + ? LocationSummary::kNoCall + : LocationSummary::kCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); - switch (rem->GetResultType()) { + switch (type) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); locations->SetInAt(1, Location::RequiresRegister()); @@ -2139,14 +2229,29 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } - case Primitive::kPrimFloat: + case Primitive::kPrimFloat: { + InvokeRuntimeCallingConvention calling_convention; + // x86 floating-point parameters are passed through core registers (EAX, ECX). + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + // The runtime helper puts the result in XMM0. + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + break; + } case Primitive::kPrimDouble: { - LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType(); + InvokeRuntimeCallingConvention calling_convention; + // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX). + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); + // The runtime helper puts the result in XMM0. + locations->SetOut(Location::FpuRegisterLocation(XMM0)); break; } default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } @@ -2158,9 +2263,14 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: + case Primitive::kPrimFloat: { + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf))); + codegen_->RecordPcInfo(rem, rem->GetDexPc()); + break; + } case Primitive::kPrimDouble: { - LOG(FATAL) << "Unimplemented rem type " << type; + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod))); + codegen_->RecordPcInfo(rem, rem->GetDexPc()); break; } default: diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4d70efcf38..233f4a4e4b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -43,8 +43,9 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: @@ -1363,9 +1364,17 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: + // Processing a Dex `float-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1388,9 +1397,17 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: + // Processing a Dex `float-to-long' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type << " to " - << result_type << " not yet implemented"; + // Processing a Dex `double-to-long' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1433,8 +1450,9 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-float' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1461,8 +1479,9 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimFloat: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `float-to-double' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1550,11 +1569,55 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-int' instruction. + XmmRegister input = in.AsFpuRegister<XmmRegister>(); + CpuRegister output = out.AsRegister<CpuRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Label done, nan; + + __ movl(output, Immediate(kPrimIntMax)); + // temp = int-to-float(output) + __ cvtsi2ss(temp, output, false); + // if input >= temp goto done + __ comiss(input, temp); + __ j(kAboveEqual, &done); + // if input == NaN goto nan + __ j(kUnordered, &nan); + // output = float-to-int-truncate(input) + __ cvttss2si(output, input, false); + __ jmp(&done); + __ Bind(&nan); + // output = 0 + __ xorl(output, output); + __ Bind(&done); break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-int' instruction. + XmmRegister input = in.AsFpuRegister<XmmRegister>(); + CpuRegister output = out.AsRegister<CpuRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Label done, nan; + + __ movl(output, Immediate(kPrimIntMax)); + // temp = int-to-double(output) + __ cvtsi2sd(temp, output); + // if input >= temp goto done + __ comisd(input, temp); + __ j(kAboveEqual, &done); + // if input == NaN goto nan + __ j(kUnordered, &nan); + // output = double-to-int-truncate(input) + __ cvttsd2si(output, input); + __ jmp(&done); + __ Bind(&nan); + // output = 0 + __ xorl(output, output); + __ Bind(&done); + break; + } default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -1574,11 +1637,55 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type << " to " - << result_type << " not yet implemented"; + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-long' instruction. + XmmRegister input = in.AsFpuRegister<XmmRegister>(); + CpuRegister output = out.AsRegister<CpuRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Label done, nan; + + __ movq(output, Immediate(kPrimLongMax)); + // temp = long-to-float(output) + __ cvtsi2ss(temp, output, true); + // if input >= temp goto done + __ comiss(input, temp); + __ j(kAboveEqual, &done); + // if input == NaN goto nan + __ j(kUnordered, &nan); + // output = float-to-long-truncate(input) + __ cvttss2si(output, input, true); + __ jmp(&done); + __ Bind(&nan); + // output = 0 + __ xorq(output, output); + __ Bind(&done); + break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-long' instruction. + XmmRegister input = in.AsFpuRegister<XmmRegister>(); + CpuRegister output = out.AsRegister<CpuRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Label done, nan; + + __ movq(output, Immediate(kPrimLongMax)); + // temp = long-to-double(output) + __ cvtsi2sd(temp, output, true); + // if input >= temp goto done + __ comisd(input, temp); + __ j(kAboveEqual, &done); + // if input == NaN goto nan + __ j(kUnordered, &nan); + // output = double-to-long-truncate(input) + __ cvttsd2si(output, input, true); + __ jmp(&done); + __ Bind(&nan); + // output = 0 + __ xorq(output, output); + __ Bind(&done); break; + } default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -1626,8 +1733,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver break; case Primitive::kPrimDouble: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `double-to-float' instruction. + __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); break; default: @@ -1652,8 +1759,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver break; case Primitive::kPrimFloat: - LOG(FATAL) << "Type conversion from " << input_type - << " to " << result_type << " not yet implemented"; + // Processing a Dex `float-to-double' instruction. + __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); break; default: @@ -1893,16 +2000,16 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // 0x80000000(00000000)/-1 triggers an arithmetic exception! // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) // so it's safe to just use negl instead of more complex comparisons. - - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - if (type == Primitive::kPrimInt) { + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); // edx:eax <- sign-extended of eax __ cdq(); // eax = quotient, edx = remainder __ idivl(second_reg); } else { + __ cmpq(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); // rdx:rax <- sign-extended of rax __ cqo(); // rax = quotient, rdx = remainder @@ -1969,9 +2076,14 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } void LocationsBuilderX86_64::VisitRem(HRem* rem) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); - switch (rem->GetResultType()) { + Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = + (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) + ? LocationSummary::kNoCall + : LocationSummary::kCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); @@ -1983,12 +2095,16 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType(); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + // The runtime helper puts the result in XMM0. + locations->SetOut(Location::FpuRegisterLocation(XMM0)); break; } default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } @@ -2000,13 +2116,16 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - - case Primitive::kPrimFloat: + case Primitive::kPrimFloat: { + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true)); + codegen_->RecordPcInfo(rem, rem->GetDexPc()); + break; + } case Primitive::kPrimDouble: { - LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType(); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true)); + codegen_->RecordPcInfo(rem, rem->GetDexPc()); break; } - default: LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); } diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index c36b1436d3..82fe03caa2 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -32,7 +32,7 @@ static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); HGraph* graph = builder.BuildGraph(*item); graph->BuildDominatorTree(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); return graph; } diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index ad6e3382bc..a6a68ca59d 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -175,7 +175,7 @@ TEST(GVNTest, LoopFieldElimination) { graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); GlobalValueNumberer(&allocator, graph).Run(); // Check that all field get instructions are still there. @@ -239,7 +239,7 @@ TEST(GVNTest, LoopSideEffects) { graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn( *outer_loop_header->GetLoopInformation())); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index c49cf7e03f..28ca5e81e6 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -44,7 +44,7 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); x86::CodeGeneratorX86 codegen(graph); SsaLivenessAnalysis liveness(*graph, &codegen); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index e3c6fec23b..5c7e6f0325 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -38,7 +38,7 @@ static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { RemoveSuspendChecks(graph); graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); return graph; diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 246e7ef309..4b69e57960 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -50,7 +50,7 @@ static void TestCode(const uint16_t* data, const char* expected) { ASSERT_NE(graph, nullptr); graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); x86::CodeGeneratorX86 codegen(graph); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 8cb2ef6de8..ba4dccf598 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -30,6 +30,36 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { VisitBlockForBackEdges(entry_block_, visited, &visiting); } +static void RemoveAsUser(HInstruction* instruction) { + for (size_t i = 0; i < instruction->InputCount(); i++) { + instruction->InputAt(i)->RemoveUser(instruction, i); + } + + HEnvironment* environment = instruction->GetEnvironment(); + if (environment != nullptr) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* vreg = environment->GetInstructionAt(i); + if (vreg != nullptr) { + vreg->RemoveEnvironmentUser(environment, i); + } + } + } +} + +void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const { + for (size_t i = 0; i < blocks_.Size(); ++i) { + if (!visited.IsBitSet(i)) { + HBasicBlock* block = blocks_.Get(i); + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + RemoveAsUser(it.Current()); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + RemoveAsUser(it.Current()); + } + } + } +} + void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { for (size_t i = 0; i < blocks_.Size(); ++i) { if (!visited.IsBitSet(i)) { @@ -72,16 +102,21 @@ void HGraph::BuildDominatorTree() { // (1) Find the back edges in the graph doing a DFS traversal. FindBackEdges(&visited); - // (2) Remove blocks not visited during the initial DFS. - // Step (3) requires dead blocks to be removed from the + // (2) Remove instructions and phis from blocks not visited during + // the initial DFS as users from other instructions, so that + // users can be safely removed before uses later. + RemoveInstructionsAsUsersFromDeadBlocks(visited); + + // (3) Remove blocks not visited during the initial DFS. + // Step (4) requires dead blocks to be removed from the // predecessors list of live blocks. RemoveDeadBlocks(visited); - // (3) Simplify the CFG now, so that we don't need to recompute + // (4) Simplify the CFG now, so that we don't need to recompute // dominators and the reverse post order. SimplifyCFG(); - // (4) Compute the immediate dominator of each block. We visit + // (5) Compute the immediate dominator of each block. We visit // the successors of a block only when all its forward branches // have been processed. GrowableArray<size_t> visits(arena_, blocks_.Size()); @@ -237,7 +272,7 @@ void HGraph::SimplifyCFG() { } } -bool HGraph::FindNaturalLoops() const { +bool HGraph::AnalyzeNaturalLoops() const { for (size_t i = 0; i < blocks_.Size(); ++i) { HBasicBlock* block = blocks_.Get(i); if (block->IsLoopHeader()) { @@ -391,19 +426,7 @@ static void Remove(HInstructionList* instruction_list, instruction->SetBlock(nullptr); instruction_list->RemoveInstruction(instruction); - for (size_t i = 0; i < instruction->InputCount(); i++) { - instruction->InputAt(i)->RemoveUser(instruction, i); - } - - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { - for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* vreg = environment->GetInstructionAt(i); - if (vreg != nullptr) { - vreg->RemoveEnvironmentUser(environment, i); - } - } - } + RemoveAsUser(instruction); } void HBasicBlock::RemoveInstruction(HInstruction* instruction) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 28496e4ad2..8a25de19d9 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -112,10 +112,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void TransformToSSA(); void SimplifyCFG(); - // Find all natural loops in this graph. Aborts computation and returns false - // if one loop is not natural, that is the header does not dominate the back - // edge. - bool FindNaturalLoops() const; + // Analyze all natural loops in this graph. Returns false if one + // loop is not natural, that is the header does not dominate the + // back edge. + bool AnalyzeNaturalLoops() const; void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); void SimplifyLoop(HBasicBlock* header); @@ -173,6 +173,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void VisitBlockForBackEdges(HBasicBlock* block, ArenaBitVector* visited, ArenaBitVector* visiting); + void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited) const; ArenaAllocator* const arena_; @@ -1669,7 +1670,12 @@ class HNewInstance : public HExpression<0> { uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. - virtual bool NeedsEnvironment() const { return true; } + bool NeedsEnvironment() const OVERRIDE { return true; } + // It may throw when called on: + // - interfaces + // - abstract/innaccessible/unknown classes + // TODO: optimize when possible. + bool CanThrow() const OVERRIDE { return true; } DECLARE_INSTRUCTION(NewInstance); @@ -1995,8 +2001,8 @@ class HNot : public HUnaryOperation { class HTypeConversion : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. - HTypeConversion(Primitive::Type result_type, HInstruction* input) - : HExpression(result_type, SideEffects::None()) { + HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc) + : HExpression(result_type, SideEffects::None()), dex_pc_(dex_pc) { SetRawInputAt(0, input); DCHECK_NE(input->GetType(), result_type); } @@ -2005,12 +2011,18 @@ class HTypeConversion : public HExpression<1> { Primitive::Type GetInputType() const { return GetInput()->GetType(); } Primitive::Type GetResultType() const { return GetType(); } + // Required by the x86 and ARM code generators when producing calls + // to the runtime. + uint32_t GetDexPc() const { return dex_pc_; } + bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } DECLARE_INSTRUCTION(TypeConversion); private: + const uint32_t dex_pc_; + DISALLOW_COPY_AND_ASSIGN(HTypeConversion); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d8533eb8bf..11fc9bf9b9 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -19,6 +19,7 @@ #include <fstream> #include <stdint.h> +#include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" #include "compiler.h" @@ -192,24 +193,24 @@ static bool CanOptimize(const DexFile::CodeItem& code_item) { } static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) { - TransformToSsa ssa(graph); HDeadCodeElimination opt1(graph); HConstantFolding opt2(graph); SsaRedundantPhiElimination opt3(graph); SsaDeadPhiElimination opt4(graph); InstructionSimplifier opt5(graph); GVNOptimization opt6(graph); - InstructionSimplifier opt7(graph); + BoundsCheckElimination bce(graph); + InstructionSimplifier opt8(graph); HOptimization* optimizations[] = { - &ssa, &opt1, &opt2, &opt3, &opt4, &opt5, &opt6, - &opt7 + &bce, + &opt8 }; for (size_t i = 0; i < arraysize(optimizations); ++i) { @@ -220,6 +221,23 @@ static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) } } +static bool TryBuildingSsa(HGraph* graph, + const DexCompilationUnit& dex_compilation_unit, + const HGraphVisualizer& visualizer) { + graph->BuildDominatorTree(); + graph->TransformToSSA(); + + if (!graph->AnalyzeNaturalLoops()) { + LOG(INFO) << "Skipping compilation of " + << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(), + *dex_compilation_unit.GetDexFile()) + << ": it contains a non natural loop"; + return false; + } + visualizer.DumpGraph("ssa transform"); + return true; +} + CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, @@ -281,7 +299,12 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (run_optimizations_ && CanOptimize(*code_item) && RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) { + VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file); optimized_compiled_methods_++; + if (!TryBuildingSsa(graph, dex_compilation_unit, visualizer)) { + // We could not transform the graph to SSA, bailout. + return nullptr; + } RunOptimizations(graph, visualizer); PrepareForRegisterAllocation(graph).Run(); @@ -316,23 +339,10 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); } else { + VLOG(compiler) << "Compile baseline " << PrettyMethod(method_idx, dex_file); unoptimized_compiled_methods_++; codegen->CompileBaseline(&allocator); - if (CanOptimize(*code_item)) { - // Run these phases to get some test coverage. - graph->BuildDominatorTree(); - graph->TransformToSSA(); - visualizer.DumpGraph("ssa"); - graph->FindNaturalLoops(); - SsaRedundantPhiElimination(graph).Run(); - SsaDeadPhiElimination(graph).Run(); - GVNOptimization(graph).Run(); - SsaLivenessAnalysis liveness(*graph, codegen); - liveness.Analyze(); - visualizer.DumpGraph(kLivenessPassName); - } - std::vector<uint8_t> mapping_table; SrcMap src_mapping_table; codegen->BuildMappingTable(&mapping_table, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index c4106b72b5..04b56345c4 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -96,6 +96,11 @@ inline std::string Patch(const std::string& original, const diff_t& diff) { return result; } +// Returns if the instruction is removed from the graph. +inline bool IsRemoved(HInstruction* instruction) { + return instruction->GetBlock() == nullptr; +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index ba4be34ca3..8d75db91d2 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -41,7 +41,7 @@ static bool Check(const uint16_t* data) { HGraph* graph = builder.BuildGraph(*item); graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); x86::CodeGeneratorX86 codegen(graph); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -255,7 +255,7 @@ static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { HGraph* graph = builder.BuildGraph(*item); graph->BuildDominatorTree(); graph->TransformToSSA(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); return graph; } @@ -494,7 +494,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, (*phi)->AddInput(*input2); graph->BuildDominatorTree(); - graph->FindNaturalLoops(); + graph->AnalyzeNaturalLoops(); return graph; } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 5ab328fe23..2cbd51aa10 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -22,20 +22,6 @@ namespace art { -class TransformToSsa : public HOptimization { - public: - explicit TransformToSsa(HGraph* graph) : HOptimization(graph, true, "ssa transform") {} - - void Run() OVERRIDE { - graph_->BuildDominatorTree(); - graph_->TransformToSSA(); - graph_->FindNaturalLoops(); - } - - private: - DISALLOW_COPY_AND_ASSIGN(TransformToSsa); -}; - static constexpr int kDefaultNumberOfLoops = 2; class SsaBuilder : public HGraphVisitor { diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc index 004af98852..a80ad938a6 100644 --- a/compiler/utils/arena_allocator.cc +++ b/compiler/utils/arena_allocator.cc @@ -189,6 +189,15 @@ Arena* ArenaPool::AllocArena(size_t size) { return ret; } +size_t ArenaPool::GetBytesAllocated() const { + size_t total = 0; + MutexLock lock(Thread::Current(), lock_); + for (Arena* arena = free_arenas_; arena != nullptr; arena = arena->next_) { + total += arena->GetBytesAllocated(); + } + return total; +} + void ArenaPool::FreeArenaChain(Arena* first) { if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) { for (Arena* arena = first; arena != nullptr; arena = arena->next_) { diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h index 6d213991d3..7f5bc9ac4c 100644 --- a/compiler/utils/arena_allocator.h +++ b/compiler/utils/arena_allocator.h @@ -135,6 +135,10 @@ class Arena { return Size() - bytes_allocated_; } + size_t GetBytesAllocated() const { + return bytes_allocated_; + } + private: size_t bytes_allocated_; uint8_t* memory_; @@ -153,11 +157,12 @@ class ArenaPool { public: ArenaPool(); ~ArenaPool(); - Arena* AllocArena(size_t size); - void FreeArenaChain(Arena* first); + Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_); + void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_); + size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_); private: - Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; Arena* free_arenas_ GUARDED_BY(lock_); DISALLOW_COPY_AND_ASSIGN(ArenaPool); }; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 474d8a909e..2a6c58e128 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -663,9 +663,19 @@ void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) { void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) { + cvttss2si(dst, src, false); +} + + +void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); - EmitOptionalRex32(dst, src); + if (is64bit) { + // Emit a REX.W prefix if the operand size is 64 bits. + EmitRex64(dst, src); + } else { + EmitOptionalRex32(dst, src); + } EmitUint8(0x0F); EmitUint8(0x2C); EmitXmmRegisterOperand(dst.LowBits(), src); @@ -673,9 +683,19 @@ void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) { void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) { + cvttsd2si(dst, src, false); +} + + +void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); - EmitOptionalRex32(dst, src); + if (is64bit) { + // Emit a REX.W prefix if the operand size is 64 bits. + EmitRex64(dst, src); + } else { + EmitOptionalRex32(dst, src); + } EmitUint8(0x0F); EmitUint8(0x2C); EmitXmmRegisterOperand(dst.LowBits(), src); @@ -1997,6 +2017,10 @@ void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) { EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); } +void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) { + EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); +} + void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) { uint8_t rex = 0x48 | operand.rex(); // REX.W000 if (dst.NeedsRex()) { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 6e71e4a5bb..51d1de2c0f 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -340,7 +340,9 @@ class X86_64Assembler FINAL : public Assembler { void cvtsd2ss(XmmRegister dst, XmmRegister src); void cvttss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version. + void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit); void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version. + void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit); void cvtdq2pd(XmmRegister dst, XmmRegister src); @@ -688,6 +690,7 @@ class X86_64Assembler FINAL : public Assembler { void EmitRex64(CpuRegister dst, CpuRegister src); void EmitRex64(CpuRegister dst, const Operand& operand); void EmitRex64(XmmRegister dst, CpuRegister src); + void EmitRex64(CpuRegister dst, XmmRegister src); // Emit a REX prefix to normalize byte registers plus necessary register bit encodings. void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); |