diff options
Diffstat (limited to 'compiler')
69 files changed, 1503 insertions, 514 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 0acdd422df..b78b3d7d75 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -172,7 +172,6 @@ enum ExtendedMIROpcode { kMirOpRangeCheck, kMirOpDivZeroCheck, kMirOpCheck, - kMirOpCheckPart2, kMirOpSelect, // Vector opcodes: diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index 4f0e9d1b67..915fbcda04 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -20,6 +20,7 @@ #include "base/bit_vector-inl.h" #include "base/macros.h" +#include "base/allocator.h" #include "compiler_enums.h" #include "dataflow_iterator-inl.h" #include "dex_instruction.h" @@ -75,6 +76,9 @@ inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc) : num_vregs_(num_vregs), vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)), + vreg_high_words_(num_vregs, false, Allocator::GetNoopAllocator(), + BitVector::BitsToWords(num_vregs), + alloc->AllocArray<uint32_t>(BitVector::BitsToWords(num_vregs))), mir_data_(alloc->Adapter()) { mir_data_.reserve(100); } @@ -82,6 +86,7 @@ GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAl inline void GvnDeadCodeElimination::VRegChains::Reset() { DCHECK(mir_data_.empty()); std::fill_n(vreg_data_, num_vregs_, VRegValue()); + vreg_high_words_.ClearAllBits(); } void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide, @@ -93,24 +98,26 @@ void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool data->wide_def = wide; data->vreg_def = v_reg; - if (vreg_data_[v_reg].change != kNPos && - mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) { - data->low_def_over_high_word = true; - } - data->prev_value = vreg_data_[v_reg]; DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + data->prev_value = vreg_data_[v_reg]; + data->low_def_over_high_word = + (vreg_data_[v_reg].change != kNPos) + ? GetMIRData(vreg_data_[v_reg].change)->vreg_def + 1 == v_reg + : vreg_high_words_.IsBitSet(v_reg); vreg_data_[v_reg].value = new_value; vreg_data_[v_reg].change = pos; + vreg_high_words_.ClearBit(v_reg); if (wide) { - if (vreg_data_[v_reg + 1].change != kNPos && - mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) { - data->high_def_over_low_word = true; - } - data->prev_value_high = vreg_data_[v_reg + 1]; DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_); + data->prev_value_high = vreg_data_[v_reg + 1]; + data->high_def_over_low_word = + (vreg_data_[v_reg + 1].change != kNPos) + ? GetMIRData(vreg_data_[v_reg + 1].change)->vreg_def == v_reg + 1 + : !vreg_high_words_.IsBitSet(v_reg + 1); vreg_data_[v_reg + 1].value = new_value; vreg_data_[v_reg + 1].change = pos; + vreg_high_words_.SetBit(v_reg + 1); } } @@ -123,9 +130,17 @@ void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() { if (data->has_def) { DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u); vreg_data_[data->vreg_def] = data->prev_value; + DCHECK(!vreg_high_words_.IsBitSet(data->vreg_def)); + if (data->low_def_over_high_word) { + vreg_high_words_.SetBit(data->vreg_def); + } if (data->wide_def) { DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u); vreg_data_[data->vreg_def + 1] = data->prev_value_high; + DCHECK(vreg_high_words_.IsBitSet(data->vreg_def + 1)); + if (data->high_def_over_low_word) { + vreg_high_words_.ClearBit(data->vreg_def + 1); + } } } mir_data_.pop_back(); @@ -169,6 +184,7 @@ void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint1 uint16_t change = vreg_data_[v_reg].change; if (change == kNPos) { vreg_data_[v_reg].value = value; + vreg_high_words_.SetBit(v_reg); } else { while (true) { MIRData* data = &mir_data_[change]; @@ -208,6 +224,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool } } vreg_data_[v_reg].value = old_value; + DCHECK(!vreg_high_words_.IsBitSet(v_reg)); // Keep marked as low word. } } else { DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_); @@ -223,6 +240,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool old_value = lvn->GetStartingVregValueNumber(v_reg); } vreg_data_[v_reg].value = old_value; + DCHECK(!vreg_high_words_.IsBitSet(v_reg)); // Keep marked as low word. } if (check_high && vreg_data_[v_reg + 1].value == kNoValue) { uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1); @@ -234,6 +252,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool } } vreg_data_[v_reg + 1].value = old_value; + DCHECK(!vreg_high_words_.IsBitSet(v_reg + 1)); // Keep marked as low word. } } } @@ -300,6 +319,8 @@ void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint if (next_change == kNPos) { DCHECK_EQ(vreg_data_[v_reg].change, old_change); vreg_data_[v_reg].change = new_change; + DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == old_data->vreg_def + 1); + // No change in vreg_high_words_. } else { DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change); mir_data_[next_change].SetPrevChange(v_reg, new_change); @@ -316,6 +337,13 @@ void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) { if (next_change == kNPos) { DCHECK_EQ(vreg_data_[v_reg].change, change); vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high; + DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == data->vreg_def + 1); + if (data->vreg_def == v_reg && data->low_def_over_high_word) { + vreg_high_words_.SetBit(v_reg); + } else if (data->vreg_def != v_reg && + (data->high_def_over_low_word || data->prev_value_high.value == kNoValue)) { + vreg_high_words_.ClearBit(v_reg); + } } else { DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change); mir_data_[next_change].RemovePrevChange(v_reg, data); @@ -533,7 +561,7 @@ MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint1 // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the // same dalvik reg to keep consistency with subsequent instructions. However, if there's no - // defining MIR for that dalvik reg, the preserved valus must come from its predecessors + // defining MIR for that dalvik reg, the preserved values must come from its predecessors // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor). if (def_change == kNPos) { if (wide) { @@ -541,7 +569,21 @@ MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint1 DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1)); CreatePhi(new_s_reg + 1); // High word Phi. } - return CreatePhi(new_s_reg); + MIR* phi = CreatePhi(new_s_reg); + // If this is a degenerate Phi with all inputs being the same SSA reg, we need to its uses. + DCHECK_NE(phi->ssa_rep->num_uses, 0u); + int old_s_reg = phi->ssa_rep->uses[0]; + bool all_same = true; + for (size_t i = 1u, num = phi->ssa_rep->num_uses; i != num; ++i) { + if (phi->ssa_rep->uses[i] != old_s_reg) { + all_same = false; + break; + } + } + if (all_same) { + vreg_chains_.RenameSRegUses(0u, last_change, old_s_reg, new_s_reg, wide); + } + return phi; } else { DCHECK_LT(def_change, last_change); DCHECK_LE(last_change, vreg_chains_.NumMIRs()); diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h index bc75a01778..06022db501 100644 --- a/compiler/dex/gvn_dead_code_elimination.h +++ b/compiler/dex/gvn_dead_code_elimination.h @@ -121,6 +121,7 @@ class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> { private: const uint32_t num_vregs_; VRegValue* const vreg_data_; + BitVector vreg_high_words_; ScopedArenaVector<MIRData> mir_data_; }; diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc index f9f0882f08..4f8127338c 100644 --- a/compiler/dex/gvn_dead_code_elimination_test.cc +++ b/compiler/dex/gvn_dead_code_elimination_test.cc @@ -1629,6 +1629,52 @@ TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) { } TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000), + DEF_MOVE(4, Instruction::MOVE, 1u, 0u), + DEF_CONST(4, Instruction::CONST, 2u, 1000), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_EQ(value_names_[0], value_names_[1]); + EXPECT_EQ(value_names_[0], value_names_[2]); + + static const bool eliminated[] = { + false, false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that we've created a single-input Phi to replace the CONST 3u. + BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4); + MIR* phi = bb4->first_mir_insn; + ASSERT_TRUE(phi != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode)); + ASSERT_EQ(1, phi->ssa_rep->num_uses); + EXPECT_EQ(0, phi->ssa_rep->uses[0]); + ASSERT_EQ(1, phi->ssa_rep->num_defs); + EXPECT_EQ(2, phi->ssa_rep->defs[0]); + EXPECT_EQ(0u, phi->dalvikInsn.vA); + MIR* move = phi->next; + ASSERT_TRUE(move != nullptr); + ASSERT_EQ(Instruction::MOVE, move->dalvikInsn.opcode); + ASSERT_EQ(1, move->ssa_rep->num_uses); + EXPECT_EQ(2, move->ssa_rep->uses[0]); + ASSERT_EQ(1, move->ssa_rep->num_defs); + EXPECT_EQ(1, move->ssa_rep->defs[0]); + EXPECT_EQ(1u, move->dalvikInsn.vA); + EXPECT_EQ(0u, move->dalvikInsn.vB); +} + +TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi3) { static const IFieldDef ifields[] = { { 0u, 1u, 0u, false, kDexMemAccessWord }, }; @@ -1850,4 +1896,39 @@ TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) { EXPECT_EQ(2u, phi->dalvikInsn.vA); } +TEST_F(GvnDeadCodeEliminationTestDiamond, LongOverlaps1) { + static const MIRDef mirs[] = { + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 2u, 1000u), + DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 4u, 0u), + DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 6u, 2u), + DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 8u, 4u), + DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 10u, 6u), + }; + + // The last insn should overlap the first and second. + static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + static const int32_t wide_sregs[] = { 0, 2, 4, 6, 8, 10 }; + MarkAsWideSRegs(wide_sregs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_EQ(value_names_[0], value_names_[1]); + EXPECT_EQ(value_names_[0], value_names_[2]); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[0], value_names_[4]); + + static const bool eliminated[] = { + false, false, false, false, false, false, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + } // namespace art diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index b4aec98e01..a7ba061984 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -834,9 +834,6 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { // 10B MIR_CHECK 0, - // 10C MIR_CHECKPART2 - 0, - // 10D MIR_SELECT DF_DA | DF_UB, diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 9e3fbbc967..1871f07106 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -52,8 +52,7 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = { "OpNullCheck", "OpRangeCheck", "OpDivZeroCheck", - "Check1", - "Check2", + "Check", "Select", "ConstVector", "MoveVector", @@ -1508,7 +1507,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { Instruction::Format dalvik_format = Instruction::k10x; // Default to no-operand format. // Handle special cases that recover the original dalvik instruction. - if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) { + if (opcode == kMirOpCheck) { str.append(extended_mir_op_names_[opcode - kMirOpFirst]); str.append(": "); // Recover the original Dex instruction. @@ -2517,8 +2516,6 @@ int MIR::DecodedInstruction::FlagsOf() const { return Instruction::kContinue | Instruction::kThrow; case kMirOpCheck: return Instruction::kContinue | Instruction::kThrow; - case kMirOpCheckPart2: - return Instruction::kContinue; case kMirOpSelect: return Instruction::kContinue; case kMirOpConstVector: diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc index 5654604797..94be1fd4a5 100644 --- a/compiler/dex/mir_method_info.cc +++ b/compiler/dex/mir_method_info.cc @@ -169,7 +169,8 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin)); it->flags_ = other_flags | - (fast_path_flags != 0 ? kFlagFastPath : 0u) | + // String init path is a special always-fast path. + (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) | ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) | ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) | (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) | diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index fb68335e6e..86bb69d01e 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1391,22 +1391,6 @@ void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) { } } } - if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr && - static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) { - // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing - // instruction to the previous block. However, the MIRGraph data used above - // doesn't reflect that, so we still need to process that MIR insn here. - MIR* mir = nullptr; - BasicBlock* pred_bb = bb; - // Traverse empty blocks. - while (mir == nullptr && pred_bb->predecessors.size() == 1u) { - pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]); - DCHECK(pred_bb != nullptr); - mir = pred_bb->last_mir_insn; - } - DCHECK(mir != nullptr); - UpdateReferenceVRegsLocal(nullptr, mir, references); - } } bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index e9e9161a1c..e3e87ecb13 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1187,7 +1187,6 @@ void Mir2Lir::HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir) { case kMirOpRangeCheck: case kMirOpDivZeroCheck: case kMirOpCheck: - case kMirOpCheckPart2: // Ignore these known opcodes break; default: @@ -1276,20 +1275,6 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { head_lir->u.m.def_mask = &kEncodeAll; } - if (opcode == kMirOpCheck) { - // Combine check and work halves of throwing instruction. - MIR* work_half = mir->meta.throw_insn; - mir->dalvikInsn = work_half->dalvikInsn; - mir->optimization_flags = work_half->optimization_flags; - mir->meta = work_half->meta; // Whatever the work_half had, we need to copy it. - opcode = work_half->dalvikInsn.opcode; - SSARepresentation* ssa_rep = work_half->ssa_rep; - work_half->ssa_rep = mir->ssa_rep; - mir->ssa_rep = ssa_rep; - work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2); - work_half->meta.throw_insn = mir; - } - if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { HandleExtendedMethodMIR(bb, mir); continue; diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 73cfe92c45..7ca438225f 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -403,7 +403,6 @@ static int kAllOpcodes[] = { kMirOpRangeCheck, kMirOpDivZeroCheck, kMirOpCheck, - kMirOpCheckPart2, kMirOpSelect, }; diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 6f2cb25911..a06303d23e 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -138,7 +138,8 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset(); // Check handle scope offset is within frame CHECK_LT(handle_scope_offset.Uint32Value(), frame_size); - // TODO: Insert the read barrier for this load. + // Note this LoadRef() already includes the heap poisoning negation. + // Note this LoadRef() does not include read barrier. It will be handled below. __ LoadRef(main_jni_conv->InterproceduralScratchRegister(), mr_conv->MethodRegister(), mirror::ArtMethod::DeclaringClassOffset()); __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false); @@ -189,6 +190,49 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, size_t current_out_arg_size = main_out_arg_size; __ IncreaseFrameSize(main_out_arg_size); + // Call the read barrier for the declaring class loaded from the method for a static call. + // Note that we always have outgoing param space available for at least two params. + if (kUseReadBarrier && is_static) { + ThreadOffset<4> read_barrier32 = QUICK_ENTRYPOINT_OFFSET(4, pReadBarrierJni); + ThreadOffset<8> read_barrier64 = QUICK_ENTRYPOINT_OFFSET(8, pReadBarrierJni); + main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); + main_jni_conv->Next(); // Skip JNIEnv. + FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset(); + main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); + // Pass the handle for the class as the first argument. + if (main_jni_conv->IsCurrentParamOnStack()) { + FrameOffset out_off = main_jni_conv->CurrentParamStackOffset(); + __ CreateHandleScopeEntry(out_off, class_handle_scope_offset, + mr_conv->InterproceduralScratchRegister(), + false); + } else { + ManagedRegister out_reg = main_jni_conv->CurrentParamRegister(); + __ CreateHandleScopeEntry(out_reg, class_handle_scope_offset, + ManagedRegister::NoRegister(), false); + } + main_jni_conv->Next(); + // Pass the current thread as the second argument and call. + if (main_jni_conv->IsCurrentParamInRegister()) { + __ GetCurrentThread(main_jni_conv->CurrentParamRegister()); + if (is_64_bit_target) { + __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64), + main_jni_conv->InterproceduralScratchRegister()); + } else { + __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32), + main_jni_conv->InterproceduralScratchRegister()); + } + } else { + __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(), + main_jni_conv->InterproceduralScratchRegister()); + if (is_64_bit_target) { + __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister()); + } else { + __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister()); + } + } + main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); // Reset. + } + // 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable // can occur. The result is the saved JNI local state that is restored by the exit call. We // abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index dbdcc96fc1..a871a82d95 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -176,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(28U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(111 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 92fa6db507..b2b54965b5 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -281,15 +281,22 @@ class ArrayAccessInsideLoopFinder : public ValueObject { return false; } + static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { + for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i); + if (!block->Dominates(back_edge)) { + return false; + } + } + return true; + } + void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - // Must be simplified loop. - DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U); for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { HBasicBlock* block = it_loop.Current(); DCHECK(block->IsInLoop()); - HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0); - if (!block->Dominates(back_edge)) { + if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure // that all array accesses collected are really executed in the loop. // For array accesses in a branch inside the loop, don't collect the @@ -1151,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor { bounds_check->GetBlock()->RemoveInstruction(bounds_check); } + static bool HasSameInputAtBackEdges(HPhi* phi) { + DCHECK(phi->IsLoopHeaderPhi()); + // Start with input 1. Input 0 is from the incoming block. + HInstruction* input1 = phi->InputAt(1); + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(1))); + for (size_t i = 2, e = phi->InputCount(); i < e; ++i) { + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(i))); + if (input1 != phi->InputAt(i)) { + return false; + } + } + return true; + } + void VisitPhi(HPhi* phi) { - if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { - DCHECK_EQ(phi->InputCount(), 2U); + if (phi->IsLoopHeaderPhi() + && (phi->GetType() == Primitive::kPrimInt) + && HasSameInputAtBackEdges(phi)) { HInstruction* instruction = phi->InputAt(1); HInstruction *left; int32_t increment; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 97be778dbd..163458f75c 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -42,7 +42,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -147,7 +147,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -219,7 +219,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -291,7 +291,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -364,7 +364,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, int initial, int increment, IfCondition cond = kCondGE) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -501,7 +501,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, int initial, int increment = -1, IfCondition cond = kCondLE) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -632,7 +632,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, int initial, int increment, IfCondition cond) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -743,7 +743,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, HInstruction** bounds_check, int initial, IfCondition cond = kCondGE) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -868,7 +868,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 0f44af07b8..a5c6f23343 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -282,7 +282,10 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets(code_ptr, code_end, &number_of_branches); + if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode); + return false; + } // Note that the compiler driver is null when unit testing. if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) { @@ -349,7 +352,7 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { current_block_ = block; } -void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, +bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); @@ -374,7 +377,14 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); - if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + + if (code_ptr >= code_end) { + if (instruction.CanFlowThrough()) { + // In the normal case we should never hit this but someone can artificially forge a dex + // file to fall-through out the method code. In this case we bail out compilation. + return false; + } + } else if (FindBlockStartingAt(dex_pc) == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); } @@ -406,7 +416,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // Fall-through. Add a block if there is more code afterwards. dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); - if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + if (code_ptr >= code_end) { + // In the normal case we should never hit this but someone can artificially forge a dex + // file to fall-through out the method code. In this case we bail out compilation. + // (A switch can fall-through so we don't need to check CanFlowThrough().) + return false; + } else if (FindBlockStartingAt(dex_pc) == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); } @@ -415,6 +430,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, dex_pc += instruction.SizeInCodeUnits(); } } + return true; } HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const { diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index dc6d97eb0c..36503ce43a 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -88,7 +88,10 @@ class HGraphBuilder : public ValueObject { // the newly created blocks. // As a side effect, also compute the number of dex instructions, blocks, and // branches. - void ComputeBranchTargets(const uint16_t* start, + // Returns true if all the branches fall inside the method code, false otherwise. + // (In normal cases this should always return true but someone can artificially + // create a code unit in which branches fall-through out of it). + bool ComputeBranchTargets(const uint16_t* start, const uint16_t* end, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index cfe121e0ec..0e776b31f7 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -100,11 +100,11 @@ static bool CheckTypeConsistency(HInstruction* instruction) { for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { if (environment->GetInstructionAt(i) != nullptr) { Primitive::Type type = environment->GetInstructionAt(i)->GetType(); - DCHECK(CheckType(type, locations->GetEnvironmentAt(i))) - << type << " " << locations->GetEnvironmentAt(i); + DCHECK(CheckType(type, environment->GetLocationAt(i))) + << type << " " << environment->GetLocationAt(i); } else { - DCHECK(locations->GetEnvironmentAt(i).IsInvalid()) - << locations->GetEnvironmentAt(i); + DCHECK(environment->GetLocationAt(i).IsInvalid()) + << environment->GetLocationAt(i); } } return true; @@ -680,6 +680,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, locations->GetStackMask(), environment_size, inlining_depth); + if (environment != nullptr) { + // TODO: Handle parent environment. + DCHECK(environment->GetParent() == nullptr); + DCHECK_EQ(environment->GetDexPc(), dex_pc); + } // Walk over the environment, and record the location of dex registers. for (size_t i = 0; i < environment_size; ++i) { @@ -689,7 +694,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, continue; } - Location location = locations->GetEnvironmentAt(i); + Location location = environment->GetLocationAt(i); switch (location.GetKind()) { case Location::kConstant: { DCHECK_EQ(current, location.GetConstant()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e4c37deb8b..f56e446605 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -3539,8 +3543,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset( kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value()); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9e02a1d850..b1cb8802b3 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -285,6 +285,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -975,14 +979,12 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, BlockPoolsScope block_pools(GetVIXLAssembler()); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); - if (instruction != nullptr) { - RecordPcInfo(instruction, dex_pc, slow_path); - DCHECK(instruction->IsSuspendCheck() - || instruction->IsBoundsCheck() - || instruction->IsNullCheck() - || instruction->IsDivZeroCheck() - || !IsLeafMethod()); - } + RecordPcInfo(instruction, dex_pc, slow_path); + DCHECK(instruction->IsSuspendCheck() + || instruction->IsBoundsCheck() + || instruction->IsNullCheck() + || instruction->IsDivZeroCheck() + || !IsLeafMethod()); } void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, @@ -1034,8 +1036,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); Register temp = temps.AcquireW(); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8aa77969fc..2848a48a64 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2827,7 +2830,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { Register low = loc.AsRegisterPairLow<Register>(); Register high = loc.AsRegisterPairHigh<Register>(); - if (shift == 32) { + if (shift == 1) { + // This is just an addition. + __ addl(low, low); + __ adcl(high, high); + } else if (shift == 32) { // Shift by 32 is easy. High gets low, and low gets 0. codegen_->EmitParallelMoves( loc.ToLow(), @@ -3993,8 +4000,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ fs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); if (successor == nullptr) { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5ac68668ba..e633970279 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -99,7 +99,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { if (is_div_) { __ negq(cpu_reg_); } else { - __ movq(cpu_reg_, Immediate(0)); + __ xorl(cpu_reg_, cpu_reg_); } } __ jmp(GetExitLabel()); @@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -671,7 +675,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { DCHECK(constant->IsLongConstant()); value = constant->AsLongConstant()->GetValue(); } - __ movq(CpuRegister(TMP), Immediate(value)); + Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); @@ -704,9 +708,9 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, } else if (const_to_move->IsLongConstant()) { int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegister()) { - __ movq(location.AsRegister<CpuRegister>(), Immediate(value)); + Load64BitValue(location.AsRegister<CpuRegister>(), value); } else if (location.IsDoubleStackSlot()) { - __ movq(CpuRegister(TMP), Immediate(value)); + Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(location.IsConstant()); @@ -771,7 +775,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -956,7 +959,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { LocationSummary* locations = comp->GetLocations(); CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. - __ xorq(reg, reg); + __ xorl(reg, reg); Location lhs = locations->InAt(0); Location rhs = locations->InAt(1); if (rhs.IsRegister()) { @@ -1419,8 +1422,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // Set the hidden argument. - __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(), - Immediate(invoke->GetDexMethodIndex())); + CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); // temp = object->GetClass(); if (receiver.IsStackSlot()) { @@ -1856,7 +1859,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; - __ movq(output, Immediate(kPrimLongMax)); + codegen_->Load64BitValue(output, kPrimLongMax); // temp = long-to-float(output) __ cvtsi2ss(temp, output, true); // if input >= temp goto done @@ -1869,7 +1872,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ jmp(&done); __ Bind(&nan); // output = 0 - __ xorq(output, output); + __ xorl(output, output); __ Bind(&done); break; } @@ -1881,7 +1884,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; - __ movq(output, Immediate(kPrimLongMax)); + codegen_->Load64BitValue(output, kPrimLongMax); // temp = long-to-double(output) __ cvtsi2sd(temp, output, true); // if input >= temp goto done @@ -1894,7 +1897,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ jmp(&done); __ Bind(&nan); // output = 0 - __ xorq(output, output); + __ xorl(output, output); __ Bind(&done); break; } @@ -2483,7 +2486,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr case Primitive::kPrimLong: { if (instruction->IsRem()) { - __ xorq(output_register, output_register); + __ xorl(output_register, output_register); } else { __ movq(output_register, input_register); if (imm == -1) { @@ -2527,7 +2530,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - __ movq(rdx, Immediate(std::abs(imm) - 1)); + codegen_->Load64BitValue(rdx, std::abs(imm) - 1); __ addq(rdx, numerator); __ testq(numerator, numerator); __ cmov(kGreaterEqual, rdx, numerator); @@ -2624,7 +2627,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat __ movq(numerator, rax); // RAX = magic - __ movq(rax, Immediate(magic)); + codegen_->Load64BitValue(rax, magic); // RDX:RAX = magic * numerator __ imulq(numerator); @@ -2653,8 +2656,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat if (IsInt<32>(imm)) { __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); } else { - __ movq(numerator, Immediate(imm)); - __ imulq(rdx, numerator); + __ imulq(rdx, codegen_->LiteralInt64Address(imm)); } __ subq(rax, rdx); @@ -3020,8 +3022,8 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); - __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); - + codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), + instruction->GetTypeIndex()); __ gs()->call( Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); @@ -3042,7 +3044,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); - __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); + codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), + instruction->GetTypeIndex()); __ gs()->call( Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); @@ -3864,8 +3867,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86_64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ gs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); if (successor == nullptr) { @@ -3938,45 +3952,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); if (destination.IsRegister()) { - __ movq(destination.AsRegister<CpuRegister>(), Immediate(value)); + codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - __ movq(CpuRegister(TMP), Immediate(value)); + codegen_->Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (constant->IsFloatConstant()) { float fp_value = constant->AsFloatConstant()->GetValue(); int32_t value = bit_cast<int32_t, float>(fp_value); - Immediate imm(value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (value == 0) { // easy FP 0.0. __ xorps(dest, dest); } else { - __ movl(CpuRegister(TMP), imm); - __ movd(dest, CpuRegister(TMP)); + __ movss(dest, codegen_->LiteralFloatAddress(fp_value)); } } else { DCHECK(destination.IsStackSlot()) << destination; + Immediate imm(value); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else { DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); double fp_value = constant->AsDoubleConstant()->GetValue(); int64_t value = bit_cast<int64_t, double>(fp_value); - Immediate imm(value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (value == 0) { __ xorpd(dest, dest); } else { - __ movq(CpuRegister(TMP), imm); - __ movd(dest, CpuRegister(TMP)); + __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value)); } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - __ movq(CpuRegister(TMP), imm); + codegen_->Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } @@ -4435,6 +4446,17 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { + if (value == 0) { + __ xorl(dest, dest); + } else if (value > 0 && IsInt<32>(value)) { + // We can use a 32 bit move, as it will zero-extend and is one byte shorter. + __ movl(dest, Immediate(static_cast<int32_t>(value))); + } else { + __ movq(dest, Immediate(value)); + } +} + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 13f9c46b5e..480ea6b9c9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -282,6 +282,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { Address LiteralInt32Address(int32_t v); Address LiteralInt64Address(int64_t v); + // Load a 64 bit value into a register in the most efficient manner. + void Load64BitValue(CpuRegister dest, int64_t value); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 94f56e5d3e..bfed1a89de 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -225,7 +225,7 @@ static void RunCodeOptimized(HGraph* graph, static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = CreateGraph(&arena); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -238,7 +238,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = CreateGraph(&arena); HGraphBuilder builder(graph, Primitive::kPrimLong); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -504,7 +504,7 @@ TEST(CodegenTest, NonMaterializedCondition) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -623,7 +623,7 @@ TEST(CodegenTest, MaterializedCondition1) { for (size_t i = 0; i < arraysize(lhs); i++) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry_block); @@ -669,7 +669,7 @@ TEST(CodegenTest, MaterializedCondition2) { for (size_t i = 0; i < arraysize(lhs); i++) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry_block); diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index b7a92b5ae5..5a1d9b488f 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -28,6 +28,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { void VisitShift(HBinaryOperation* shift); void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitCompare(HCompare* instruction) OVERRIDE; void VisitMul(HMul* instruction) OVERRIDE; void VisitOr(HOr* instruction) OVERRIDE; void VisitRem(HRem* instruction) OVERRIDE; @@ -108,6 +109,26 @@ void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { } } +void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + if (input_cst != nullptr) { + HInstruction* input_value = instruction->GetLeastConstantLeft(); + if (Primitive::IsFloatingPointType(input_value->GetType()) && + ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) || + (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) { + // Replace code looking like + // CMP{G,L} dst, src, NaN + // with + // CONSTANT +1 (gt bias) + // or + // CONSTANT -1 (lt bias) + instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimInt, + (instruction->IsGtBias() ? 1 : -1))); + instruction->GetBlock()->RemoveInstruction(instruction); + } + } +} + void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) { HConstant* input_cst = instruction->GetConstantRight(); Primitive::Type type = instruction->GetType(); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index cd427c5ed8..6fbe75e802 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -47,6 +47,12 @@ static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { } } +static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) { + for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) { + set->SetBit(it.Current()->GetHeader()->GetBlockId()); + } +} + void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction, @@ -58,18 +64,24 @@ void HDeadCodeElimination::RemoveDeadBlocks() { // Classify blocks as reachable/unreachable. ArenaAllocator* allocator = graph_->GetArena(); ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false); + ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false); + MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); - // Remove all dead blocks. Process blocks in post-order, because removal needs - // the block's chain of dominators. + // Remove all dead blocks. Iterate in post order because removal needs the + // block's chain of dominators and nested loops need to be updated from the + // inside out. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (live_blocks.IsBitSet(block->GetBlockId())) { - // If this block is part of a loop that is being dismantled, we need to - // update its loop information. - block->UpdateLoopInformation(); + int id = block->GetBlockId(); + if (live_blocks.IsBitSet(id)) { + if (affected_loops.IsBitSet(id)) { + DCHECK(block->IsLoopHeader()); + block->GetLoopInformation()->Update(); + } } else { MaybeRecordDeadBlock(block); + MarkLoopHeadersContaining(*block, &affected_loops); block->DisconnectAndDelete(); } } diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 0bea0fc1c2..59a57c4345 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -31,13 +31,13 @@ class HDeadCodeElimination : public HOptimization { public: HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats = nullptr, - const char* name = kDeadCodeEliminationPassName) + const char* name = kInitialDeadCodeEliminationPassName) : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; - static constexpr const char* kDeadCodeEliminationPassName = - "dead_code_elimination"; + static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination"; + static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final"; private: void MaybeRecordDeadBlock(HBasicBlock* block); diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 61a7697301..78ae1dd960 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -27,7 +27,7 @@ namespace art { static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 2bfecc696a..29aa97a83a 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -28,7 +28,7 @@ namespace art { static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) { TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header - const int blocks2[] = {2, 3, 4, 5, 8}; - TestBlock(graph, 2, true, 2, blocks2, 5); // loop header + const int blocks2[] = {2, 3, 4, 5}; + TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header TestBlock(graph, 3, false, 2); // block in loop - TestBlock(graph, 4, false, 2); // original back edge - TestBlock(graph, 5, false, 2); // original back edge + TestBlock(graph, 4, false, 2); // back edge + TestBlock(graph, 5, false, 2); // back edge TestBlock(graph, 6, false, -1); // return block TestBlock(graph, 7, false, -1); // exit block - TestBlock(graph, 8, false, 2); // synthesized back edge } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 8ea8f3cd79..fd28f0b83f 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -288,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); + HLoopInformation* loop_information = loop_header->GetLoopInformation(); // Ensure the pre-header block is first in the list of // predecessors of a loop header. @@ -297,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { id)); } - // Ensure the loop header has only two predecessors and that only the - // second one is a back edge. + // Ensure the loop header has only one incoming branch and the remaining + // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().Size(); if (num_preds < 2) { AddError(StringPrintf( "Loop header %d has less than two predecessors: %zu.", id, num_preds)); - } else if (num_preds > 2) { - AddError(StringPrintf( - "Loop header %d has more than two predecessors: %zu.", - id, - num_preds)); } else { - HLoopInformation* loop_information = loop_header->GetLoopInformation(); HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); if (loop_information->IsBackEdge(*first_predecessor)) { AddError(StringPrintf( "First predecessor of loop header %d is a back edge.", id)); } - HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); - if (!loop_information->IsBackEdge(*second_predecessor)) { - AddError(StringPrintf( - "Second predecessor of loop header %d is not a back edge.", - id)); + for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i); + if (!loop_information->IsBackEdge(*predecessor)) { + AddError(StringPrintf( + "Loop header %d has multiple incoming (non back edge) blocks.", + id)); + } } } - const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks(); + const ArenaBitVector& loop_blocks = loop_information->GetBlocks(); - // Ensure there is only one back edge per loop. - size_t num_back_edges = - loop_header->GetLoopInformation()->GetBackEdges().Size(); + // Ensure back edges belong to the loop. + size_t num_back_edges = loop_information->GetBackEdges().Size(); if (num_back_edges == 0) { AddError(StringPrintf( "Loop defined by header %d has no back edge.", id)); - } else if (num_back_edges > 1) { - AddError(StringPrintf( - "Loop defined by header %d has several back edges: %zu.", - id, - num_back_edges)); } else { - DCHECK_EQ(num_back_edges, 1u); - int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId(); - if (!loop_blocks.IsBitSet(back_edge_id)) { - AddError(StringPrintf( - "Loop defined by header %d has an invalid back edge %d.", - id, - back_edge_id)); + for (size_t i = 0; i < num_back_edges; ++i) { + int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } } } @@ -394,8 +386,9 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { // Ensure an instruction having an environment is dominated by the // instructions contained in the environment. - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* env_instruction = environment->GetInstructionAt(i); if (env_instruction != nullptr diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 923468ff16..eca0d9344f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -30,7 +30,7 @@ namespace art { * 1: Exit */ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry_block = new (allocator) HBasicBlock(graph); entry_block->AddInstruction(new (allocator) HGoto()); graph->AddBlock(entry_block); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 50398b4790..59d50926ad 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -73,7 +73,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_true = createGotoBlock(graph, &allocator); @@ -108,7 +108,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_false = createGotoBlock(graph, &allocator); @@ -143,7 +143,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -178,7 +178,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -213,7 +213,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); @@ -252,7 +252,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); @@ -288,7 +288,7 @@ TEST(GraphTest, InsertInstructionBefore) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* block = createGotoBlock(graph, &allocator); HInstruction* got = block->GetLastInstruction(); ASSERT_TRUE(got->IsControlFlow()); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index ca9cbc3d01..f5c630bf97 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -17,6 +17,7 @@ #include "graph_visualizer.h" #include "code_generator.h" +#include "dead_code_elimination.h" #include "licm.h" #include "nodes.h" #include "optimization.h" @@ -211,17 +212,22 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << "]"; } if (instruction->HasEnvironment()) { - HEnvironment* env = instruction->GetEnvironment(); - output_ << " (env: [ "; - for (size_t i = 0, e = env->Size(); i < e; ++i) { - HInstruction* insn = env->GetInstructionAt(i); - if (insn != nullptr) { - output_ << GetTypeId(insn->GetType()) << insn->GetId() << " "; - } else { - output_ << " _ "; + output_ << " (env:"; + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + output_ << " [ "; + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* insn = environment->GetInstructionAt(i); + if (insn != nullptr) { + output_ << GetTypeId(insn->GetType()) << insn->GetId() << " "; + } else { + output_ << " _ "; + } } + output_ << "]"; } - output_ << "])"; + output_ << ")"; } if (IsPass(SsaLivenessAnalysis::kLivenessPassName) && is_after_pass_ @@ -248,7 +254,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } } output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; - } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) { + } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName) + || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) { output_ << " ( loop_header:"; HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (info == nullptr) { diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index a81d49aa0c..c3ce7e142a 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -29,7 +29,7 @@ TEST(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -78,7 +78,7 @@ TEST(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -133,7 +133,7 @@ TEST(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -220,7 +220,7 @@ TEST(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ada32db047..afffc7ab4f 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -170,7 +170,11 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, nullptr); HGraph* callee_graph = new (graph_->GetArena()) HGraph( - graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); + graph_->GetArena(), + caller_dex_file, + method_index, + graph_->IsDebuggable(), + graph_->GetCurrentInstructionId()); OptimizingCompilerStats inline_stats; HGraphBuilder builder(callee_graph, diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index e79d4f4bdc..46fad17b8f 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -137,13 +137,25 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - if ((input_cst != nullptr) && input_cst->IsZero()) { - // Replace code looking like - // SHL dst, src, 0 - // with - // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); + if (input_cst != nullptr) { + if (input_cst->IsZero()) { + // Replace code looking like + // SHL dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->IsShl() && input_cst->IsOne()) { + // Replace Shl looking like + // SHL dst, src, 1 + // with + // ADD dst, src, src + HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), + input_other, + input_other); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 7f7b450003..dccfe9a0ca 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -850,6 +850,94 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + ArmAssembler* assembler, + CodeGeneratorARM* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + Register tmp_reg = locations->GetTemp(0).AsRegister<Register>(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeARM* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARM(invoke); + codegen->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + Register char_reg = locations->InAt(1).AsRegister<Register>(); + __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max()); + __ cmp(char_reg, ShifterOperand(tmp_reg)); + slow_path = new (allocator) IntrinsicSlowPathARM(invoke); + codegen->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), HI); + } + + if (start_at_zero) { + DCHECK_EQ(tmp_reg, R2); + // Start-index = 0. + __ LoadImmediate(tmp_reg, 0); + } + + __ LoadFromOffset(kLoadWord, LR, TR, + QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value()); + __ blx(LR); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(Location::RegisterLocation(R0)); + + // Need a temp for slow-path codepoint compare, and need to send start-index=0. + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(R0)); + + // Need a temp for slow-path codepoint compare. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, @@ -951,8 +1039,6 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ca3de99092..2c4fab0465 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -993,6 +993,91 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + vixl::MacroAssembler* masm, + CodeGeneratorARM64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + Register tmp_reg = WRegisterFrom(locations->GetTemp(0)); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeARM64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + Register char_reg = WRegisterFrom(locations->InAt(1)); + __ Mov(tmp_reg, 0xFFFF); + __ Cmp(char_reg, Operand(tmp_reg)); + slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + __ B(hi, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + // Start-index = 0. + __ Mov(tmp_reg, 0); + } + + __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value())); + __ Blr(lr); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + + // Need a temp for slow-path codepoint compare, and need to send start_index=0. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + + // Need a temp for slow-path codepoint compare. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false); +} + void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, @@ -1080,8 +1165,6 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED } UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 1eef1eff0b..28b7a07cf9 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -16,6 +16,8 @@ #include "intrinsics_x86.h" +#include <limits> + #include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -124,11 +126,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { // restored! class IntrinsicSlowPathX86 : public SlowPathCodeX86 { public: - explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp) - : invoke_(invoke) { - // The temporary register has to be EAX for x86 invokes. - DCHECK_EQ(temp, EAX); - } + explicit IntrinsicSlowPathX86(HInvoke* invoke) + : invoke_(invoke) { } void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in); @@ -880,8 +879,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { @@ -901,8 +898,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { // TODO: For simplicity, the index parameter is requested in a register, so different from Quick // we will not optimize the code for constants (which would save a register). - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( - invoke, locations->GetTemp(0).AsRegister<Register>()); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); X86Assembler* assembler = GetAssembler(); @@ -926,8 +922,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(EAX)); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { @@ -939,8 +933,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { Register argument = locations->InAt(1).AsRegister<Register>(); __ testl(argument, argument); - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( - invoke, locations->GetTemp(0).AsRegister<Register>()); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -948,6 +941,158 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void CreateStringIndexOfLocations(HInvoke* invoke, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // The data needs to be in EDI for scasw. So request that the string is there, anyways. + locations->SetInAt(0, Location::RegisterLocation(EDI)); + // If we look for a constant char, we'll still have to copy it into EAX. So just request the + // allocator to do that, anyways. We can still do the constant check by checking the parameter + // of the instruction explicitly. + // Note: This works as we don't clobber EAX anywhere. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + if (!start_at_zero) { + locations->SetInAt(2, Location::RequiresRegister()); // The starting index. + } + // As we clobber EDI during execution anyways, also use it as the output. + locations->SetOut(Location::SameAsFirstInput()); + + // repne scasw uses ECX as the counter. + locations->AddTemp(Location::RegisterLocation(ECX)); + // Need another temporary to be able to compute the result. + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + X86Assembler* assembler, + CodeGeneratorX86* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + Register string_obj = locations->InAt(0).AsRegister<Register>(); + Register search_value = locations->InAt(1).AsRegister<Register>(); + Register counter = locations->GetTemp(0).AsRegister<Register>(); + Register string_length = locations->GetTemp(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + // Check our assumptions for registers. + DCHECK_EQ(string_obj, EDI); + DCHECK_EQ(search_value, EAX); + DCHECK_EQ(counter, ECX); + DCHECK_EQ(out, EDI); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeX86* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); + slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + __ j(kAbove, slow_path->GetEntryLabel()); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + // Location of reference to data array within the String object. + int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count within the String object. + int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + // Load string length, i.e., the count field of the string. + __ movl(string_length, Address(string_obj, count_offset)); + + // Do a zero-length check. + // TODO: Support jecxz. + Label not_found_label; + __ testl(string_length, string_length); + __ j(kEqual, ¬_found_label); + + if (start_at_zero) { + // Number of chars to scan is the same as the string length. + __ movl(counter, string_length); + + // Move to the start of the string. + __ addl(string_obj, Immediate(value_offset)); + } else { + Register start_index = locations->InAt(2).AsRegister<Register>(); + + // Do a start_index check. + __ cmpl(start_index, string_length); + __ j(kGreaterEqual, ¬_found_label); + + // Ensure we have a start index >= 0; + __ xorl(counter, counter); + __ cmpl(start_index, Immediate(0)); + __ cmovl(kGreater, counter, start_index); + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx (the repne scasw work counter). We have string.length - start_index left to + // compare. + __ negl(counter); + __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); + } + + // Everything is set up for repne scasw: + // * Comparison address in EDI. + // * Counter in ECX. + __ repne_scasw(); + + // Did we find a match? + __ j(kNotEqual, ¬_found_label); + + // Yes, we matched. Compute the index of the result. + __ subl(string_length, counter); + __ leal(out, Address(string_length, -1)); + + Label done; + __ jmp(&done); + + // Failed to match; return -1. + __ Bind(¬_found_label); + __ movl(out, Immediate(-1)); + + // And join up at the end. + __ Bind(&done); + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, true); +} + +void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, false); +} + +void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, @@ -958,8 +1103,6 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); locations->SetOut(Location::RegisterLocation(EAX)); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -968,8 +1111,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { Register byte_array = locations->InAt(0).AsRegister<Register>(); __ testl(byte_array, byte_array); - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( - invoke, locations->GetTemp(0).AsRegister<Register>()); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1003,8 +1145,6 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invok InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(EAX)); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { @@ -1013,8 +1153,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) Register string_to_copy = locations->InAt(0).AsRegister<Register>(); __ testl(string_to_copy, string_to_copy); - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( - invoke, locations->GetTemp(0).AsRegister<Register>()); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1584,8 +1723,6 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 1fc5432a89..0efa714a23 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -16,6 +16,8 @@ #include "intrinsics_x86_64.h" +#include <limits> + #include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -783,7 +785,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ Bind(&nan); // output = 0 - __ xorq(out, out); + __ xorl(out, out); __ Bind(&done); } @@ -858,6 +860,157 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void CreateStringIndexOfLocations(HInvoke* invoke, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // The data needs to be in RDI for scasw. So request that the string is there, anyways. + locations->SetInAt(0, Location::RegisterLocation(RDI)); + // If we look for a constant char, we'll still have to copy it into RAX. So just request the + // allocator to do that, anyways. We can still do the constant check by checking the parameter + // of the instruction explicitly. + // Note: This works as we don't clobber RAX anywhere. + locations->SetInAt(1, Location::RegisterLocation(RAX)); + if (!start_at_zero) { + locations->SetInAt(2, Location::RequiresRegister()); // The starting index. + } + // As we clobber RDI during execution anyways, also use it as the output. + locations->SetOut(Location::SameAsFirstInput()); + + // repne scasw uses RCX as the counter. + locations->AddTemp(Location::RegisterLocation(RCX)); + // Need another temporary to be able to compute the result. + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + // Check our assumptions for registers. + DCHECK_EQ(string_obj.AsRegister(), RDI); + DCHECK_EQ(search_value.AsRegister(), RAX); + DCHECK_EQ(counter.AsRegister(), RCX); + DCHECK_EQ(out.AsRegister(), RDI); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeX86_64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + codegen->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); + slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + codegen->AddSlowPath(slow_path); + __ j(kAbove, slow_path->GetEntryLabel()); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + // Location of reference to data array within the String object. + int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count within the String object. + int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + // Load string length, i.e., the count field of the string. + __ movl(string_length, Address(string_obj, count_offset)); + + // Do a length check. + // TODO: Support jecxz. + Label not_found_label; + __ testl(string_length, string_length); + __ j(kEqual, ¬_found_label); + + if (start_at_zero) { + // Number of chars to scan is the same as the string length. + __ movl(counter, string_length); + + // Move to the start of the string. + __ addq(string_obj, Immediate(value_offset)); + } else { + CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); + + // Do a start_index check. + __ cmpl(start_index, string_length); + __ j(kGreaterEqual, ¬_found_label); + + // Ensure we have a start index >= 0; + __ xorl(counter, counter); + __ cmpl(start_index, Immediate(0)); + __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough. + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx, the work counter: it's gonna be string.length - start_index. + __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. + __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); + } + + // Everything is set up for repne scasw: + // * Comparison address in RDI. + // * Counter in ECX. + __ repne_scasw(); + + // Did we find a match? + __ j(kNotEqual, ¬_found_label); + + // Yes, we matched. Compute the index of the result. + __ subl(string_length, counter); + __ leal(out, Address(string_length, -1)); + + Label done; + __ jmp(&done); + + // Failed to match; return -1. + __ Bind(¬_found_label); + __ movl(out, Immediate(-1)); + + // And join up at the end. + __ Bind(&done); + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, true); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, false); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, @@ -1434,8 +1587,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE } UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index bf9b8e59c5..2535ea274a 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -39,8 +39,9 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) { } } - if (instruction->HasEnvironment()) { - HEnvironment* environment = instruction->GetEnvironment(); + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* input = environment->GetInstructionAt(i); if (input != nullptr) { @@ -63,13 +64,15 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) { * If `environment` has a loop header phi, we replace it with its first input. */ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) { - for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* input = environment->GetInstructionAt(i); - if (input != nullptr && IsPhiOf(input, info->GetHeader())) { - environment->RemoveAsUserOfInput(i); - HInstruction* incoming = input->InputAt(0); - environment->SetRawEnvAt(i, incoming); - incoming->AddEnvUseAt(environment, i); + for (; environment != nullptr; environment = environment->GetParent()) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* input = environment->GetInstructionAt(i); + if (input != nullptr && IsPhiOf(input, info->GetHeader())) { + environment->RemoveAsUserOfInput(i); + HInstruction* incoming = input->InputAt(0); + environment->SetRawEnvAt(i, incoming); + incoming->AddEnvUseAt(environment, i); + } } } } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 7818c606db..4f259b5095 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -39,7 +39,7 @@ namespace art { static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 52367730ed..7cb00a1923 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -32,7 +32,7 @@ namespace art { static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 8a96ee9ace..9d7d0b6c67 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector, static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, phi in block 8) + // (constant0, constant4, constant5, phi in block 2) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01110)\n" - " kill: (00010)\n" + " live in: (0110)\n" + " live out: (0111)\n" + " kill: (0001)\n" "Block 3\n" - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 4\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 5\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 4\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 5\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 6\n" // return block - " live in: (00010)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 7\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n" - "Block 8\n" // synthesized back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00001)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index a1ae67009e..42aba04828 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -25,8 +25,6 @@ LocationSummary::LocationSummary(HInstruction* instruction, bool intrinsified) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), - environment_(instruction->GetBlock()->GetGraph()->GetArena(), - instruction->EnvironmentSize()), output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), @@ -37,10 +35,6 @@ LocationSummary::LocationSummary(HInstruction* instruction, for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); } - environment_.SetSize(instruction->EnvironmentSize()); - for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { - environment_.Put(i, Location()); - } instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index c3a99150c4..09bbb33042 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -525,14 +525,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return temps_.Size(); } - void SetEnvironmentAt(uint32_t at, Location location) { - environment_.Put(at, location); - } - - Location GetEnvironmentAt(uint32_t at) const { - return environment_.Get(at); - } - Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } @@ -602,7 +594,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; - GrowableArray<Location> environment_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d3ee770941..41adc7223e 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -17,6 +17,7 @@ #include "nodes.h" #include "ssa_builder.h" +#include "base/bit_vector-inl.h" #include "utils/growable_array.h" #include "scoped_thread_state_change.h" @@ -37,8 +38,9 @@ static void RemoveAsUser(HInstruction* instruction) { instruction->RemoveAsUserOfInput(i); } - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { if (environment->GetInstructionAt(i) != nullptr) { environment->RemoveAsUserOfInput(i); @@ -191,24 +193,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { void HGraph::SimplifyLoop(HBasicBlock* header) { HLoopInformation* info = header->GetLoopInformation(); - // If there are more than one back edge, make them branch to the same block that - // will become the only back edge. This simplifies finding natural loops in the - // graph. - // Also, if the loop is a do/while (that is the back edge is an if), change the - // back edge to be a goto. This simplifies code generation of suspend cheks. - if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) { - HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc()); - AddBlock(new_back_edge); - new_back_edge->AddInstruction(new (arena_) HGoto()); - for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { - HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - back_edge->ReplaceSuccessor(header, new_back_edge); - } - info->ClearBackEdges(); - info->AddBackEdge(new_back_edge); - new_back_edge->AddSuccessor(header); - } - // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the // loop. @@ -218,11 +202,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto()); - ArenaBitVector back_edges(arena_, GetBlocks().Size(), false); - HBasicBlock* back_edge = info->GetBackEdges().Get(0); for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); - if (predecessor != back_edge) { + if (!info->IsBackEdge(*predecessor)) { predecessor->ReplaceSuccessor(header, pre_header); pred--; } @@ -230,9 +212,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { pre_header->AddSuccessor(header); } - // Make sure the second predecessor of a loop header is the back edge. - if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) { - header->SwapPredecessors(); + // Make sure the first predecessor of a loop header is the incoming block. + if (info->IsBackEdge(*header->GetPredecessors().Get(0))) { + HBasicBlock* to_swap = header->GetPredecessors().Get(0); + for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) { + HBasicBlock* predecessor = header->GetPredecessors().Get(pred); + if (!info->IsBackEdge(*predecessor)) { + header->predecessors_.Put(pred, to_swap); + header->predecessors_.Put(0, predecessor); + break; + } + } } // Place the suspend check at the beginning of the header, so that live registers @@ -357,24 +347,59 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } bool HLoopInformation::Populate() { - DCHECK_EQ(GetBackEdges().Size(), 1u); - HBasicBlock* back_edge = GetBackEdges().Get(0); - DCHECK(back_edge->GetDominator() != nullptr); - if (!header_->Dominates(back_edge)) { - // This loop is not natural. Do not bother going further. - return false; - } + DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated"; + for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = GetBackEdges().Get(i); + DCHECK(back_edge->GetDominator() != nullptr); + if (!header_->Dominates(back_edge)) { + // This loop is not natural. Do not bother going further. + return false; + } - // Populate this loop: starting with the back edge, recursively add predecessors - // that are not already part of that loop. Set the header as part of the loop - // to end the recursion. - // This is a recursive implementation of the algorithm described in - // "Advanced Compiler Design & Implementation" (Muchnick) p192. - blocks_.SetBit(header_->GetBlockId()); - PopulateRecursive(back_edge); + // Populate this loop: starting with the back edge, recursively add predecessors + // that are not already part of that loop. Set the header as part of the loop + // to end the recursion. + // This is a recursive implementation of the algorithm described in + // "Advanced Compiler Design & Implementation" (Muchnick) p192. + blocks_.SetBit(header_->GetBlockId()); + PopulateRecursive(back_edge); + } return true; } +void HLoopInformation::Update() { + HGraph* graph = header_->GetGraph(); + for (uint32_t id : blocks_.Indexes()) { + HBasicBlock* block = graph->GetBlocks().Get(id); + // Reset loop information of non-header blocks inside the loop, except + // members of inner nested loops because those should already have been + // updated by their own LoopInformation. + if (block->GetLoopInformation() == this && block != header_) { + block->SetLoopInformation(nullptr); + } + } + blocks_.ClearAllBits(); + + if (back_edges_.IsEmpty()) { + // The loop has been dismantled, delete its suspend check and remove info + // from the header. + DCHECK(HasSuspendCheck()); + header_->RemoveInstruction(suspend_check_); + header_->SetLoopInformation(nullptr); + header_ = nullptr; + suspend_check_ = nullptr; + } else { + if (kIsDebugBuild) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + DCHECK(header_->Dominates(back_edges_.Get(i))); + } + } + // This loop still has reachable back edges. Repopulate the list of blocks. + bool populate_successful = Populate(); + DCHECK(populate_successful); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { return header_->GetDominator(); } @@ -387,6 +412,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } +size_t HLoopInformation::GetLifetimeEnd() const { + size_t last_position = 0; + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position); + } + return last_position; +} + bool HBasicBlock::Dominates(HBasicBlock* other) const { // Walk up the dominator tree from `other`, to find out if `this` // is an ancestor. @@ -503,6 +536,16 @@ void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_ } } +void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) { + for (size_t i = 0; i < locals.Size(); i++) { + HInstruction* instruction = locals.Get(i); + SetRawEnvAt(i, instruction); + if (instruction != nullptr) { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::CopyFrom(HEnvironment* env) { for (size_t i = 0; i < env->Size(); i++) { HInstruction* instruction = env->GetInstructionAt(i); @@ -963,8 +1006,9 @@ void HBasicBlock::DisconnectAndDelete() { HLoopInformation* loop_info = it.Current(); loop_info->Remove(this); if (loop_info->IsBackEdge(*this)) { - // This deliberately leaves the loop in an inconsistent state and will - // fail SSAChecker unless the entire loop is removed during the pass. + // If this was the last back edge of the loop, we deliberately leave the + // loop in an inconsistent state and will fail SSAChecker unless the + // entire loop is removed during the pass. loop_info->RemoveBackEdge(this); } } @@ -1040,20 +1084,6 @@ void HBasicBlock::DisconnectAndDelete() { SetGraph(nullptr); } -void HBasicBlock::UpdateLoopInformation() { - // Check if loop information points to a dismantled loop. If so, replace with - // the loop information of a larger loop which contains this block, or nullptr - // otherwise. We iterate in case the larger loop has been destroyed too. - while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) { - if (IsLoopHeader()) { - HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck(); - DCHECK_EQ(suspend_check->GetBlock(), this); - RemoveInstruction(suspend_check); - } - loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation(); - } -} - void HBasicBlock::MergeWith(HBasicBlock* other) { DCHECK_EQ(GetGraph(), other->GetGraph()); DCHECK(GetDominatedBlocks().Contains(other)); @@ -1075,8 +1105,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) { HLoopInformation* loop_info = it.Current(); loop_info->Remove(other); if (loop_info->IsBackEdge(*other)) { - loop_info->ClearBackEdges(); - loop_info->AddBackEdge(this); + loop_info->ReplaceBackEdge(other, this); } } @@ -1307,11 +1336,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { loop_it.Current()->Add(to); } if (info->IsBackEdge(*at)) { - // Only `at` can become a back edge, as the inlined blocks - // are predecessors of `at`. - DCHECK_EQ(1u, info->NumberOfBackEdges()); - info->ClearBackEdges(); - info->AddBackEdge(to); + // Only `to` can become a back edge, as the inlined blocks + // are predecessors of `to`. + info->ReplaceBackEdge(at, to); } } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 63f3c95c7d..0089f22169 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -48,6 +48,7 @@ class HPhi; class HSuspendCheck; class LiveInterval; class LocationSummary; +class SlowPathCode; class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; @@ -116,7 +117,11 @@ class HInstructionList { // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocMisc> { public: - HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0) + HGraph(ArenaAllocator* arena, + const DexFile& dex_file, + uint32_t method_idx, + bool debuggable = false, + int start_instruction_id = 0) : arena_(arena), blocks_(arena, kDefaultNumberOfBlocks), reverse_post_order_(arena, kDefaultNumberOfBlocks), @@ -130,6 +135,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { has_bounds_checks_(false), debuggable_(debuggable), current_instruction_id_(start_instruction_id), + dex_file_(dex_file), + method_idx_(method_idx), cached_null_constant_(nullptr), cached_int_constants_(std::less<int32_t>(), arena->Adapter()), cached_float_constants_(std::less<int32_t>(), arena->Adapter()), @@ -262,6 +269,14 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; + const DexFile& GetDexFile() const { + return dex_file_; + } + + uint32_t GetMethodIdx() const { + return method_idx_; + } + private: void VisitBlockForDominatorTree(HBasicBlock* block, HBasicBlock* predecessor, @@ -338,6 +353,12 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; + // The dex file from which the method is from. + const DexFile& dex_file_; + + // The method index in the dex file. + const uint32_t method_idx_; + // Cached constants. HNullConstant* cached_null_constant_; ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_; @@ -397,19 +418,30 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return back_edges_; } - HBasicBlock* GetSingleBackEdge() const { - DCHECK_EQ(back_edges_.Size(), 1u); - return back_edges_.Get(0); - } + // Returns the lifetime position of the back edge that has the + // greatest lifetime position. + size_t GetLifetimeEnd() const; - void ClearBackEdges() { - back_edges_.Reset(); + void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + if (back_edges_.Get(i) == existing) { + back_edges_.Put(i, new_back_edge); + return; + } + } + UNREACHABLE(); } - // Find blocks that are part of this loop. Returns whether the loop is a natural loop, + // Finds blocks that are part of this loop. Returns whether the loop is a natural loop, // that is the header dominates the back edge. bool Populate(); + // Reanalyzes the loop by removing loop info from its blocks and re-running + // Populate(). If there are no back edges left, the loop info is completely + // removed as well as its SuspendCheck instruction. It must be run on nested + // inner loops first. + void Update(); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -679,14 +711,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { loop_information_ = info; } - // Checks if the loop information points to a valid loop. If the loop has been - // dismantled (does not have a back edge any more), loop information is - // removed or replaced with the information of the first valid outer loop. - void UpdateLoopInformation(); - bool IsInLoop() const { return loop_information_ != nullptr; } - // Returns wheter this block dominates the blocked passed as parameter. + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; size_t GetLifetimeStart() const { return lifetime_start_; } @@ -928,6 +955,14 @@ class HUseList : public ValueObject { return first_ != nullptr && first_->next_ == nullptr; } + size_t SizeSlow() const { + size_t count = 0; + for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) { + ++count; + } + return count; + } + private: HUseListNode<T>* first_; }; @@ -1054,15 +1089,43 @@ class SideEffects : public ValueObject { // A HEnvironment object contains the values of virtual registers at a given location. class HEnvironment : public ArenaObject<kArenaAllocMisc> { public: - HEnvironment(ArenaAllocator* arena, size_t number_of_vregs) - : vregs_(arena, number_of_vregs) { + HEnvironment(ArenaAllocator* arena, + size_t number_of_vregs, + const DexFile& dex_file, + uint32_t method_idx, + uint32_t dex_pc) + : vregs_(arena, number_of_vregs), + locations_(arena, number_of_vregs), + parent_(nullptr), + dex_file_(dex_file), + method_idx_(method_idx), + dex_pc_(dex_pc) { vregs_.SetSize(number_of_vregs); for (size_t i = 0; i < number_of_vregs; i++) { vregs_.Put(i, HUserRecord<HEnvironment*>()); } + + locations_.SetSize(number_of_vregs); + for (size_t i = 0; i < number_of_vregs; ++i) { + locations_.Put(i, Location()); + } } - void CopyFrom(HEnvironment* env); + void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) { + parent_ = new (allocator) HEnvironment(allocator, + parent->Size(), + parent->GetDexFile(), + parent->GetMethodIdx(), + parent->GetDexPc()); + if (parent->GetParent() != nullptr) { + parent_->SetAndCopyParentChain(allocator, parent->GetParent()); + } + parent_->CopyFrom(parent); + } + + void CopyFrom(const GrowableArray<HInstruction*>& locals); + void CopyFrom(HEnvironment* environment); + // Copy from `env`. If it's a loop phi for `loop_header`, copy the first // input to the loop phi instead. This is for inserting instructions that // require an environment (like HDeoptimization) in the loop pre-header. @@ -1080,6 +1143,28 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { size_t Size() const { return vregs_.Size(); } + HEnvironment* GetParent() const { return parent_; } + + void SetLocationAt(size_t index, Location location) { + locations_.Put(index, location); + } + + Location GetLocationAt(size_t index) const { + return locations_.Get(index); + } + + uint32_t GetDexPc() const { + return dex_pc_; + } + + uint32_t GetMethodIdx() const { + return method_idx_; + } + + const DexFile& GetDexFile() const { + return dex_file_; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -1090,6 +1175,11 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { } GrowableArray<HUserRecord<HEnvironment*> > vregs_; + GrowableArray<Location> locations_; + HEnvironment* parent_; + const DexFile& dex_file_; + const uint32_t method_idx_; + const uint32_t dex_pc_; friend class HInstruction; @@ -1221,6 +1311,11 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { } virtual bool NeedsEnvironment() const { return false; } + virtual uint32_t GetDexPc() const { + LOG(FATAL) << "GetDexPc() cannot be called on an instruction that" + " does not need an environment"; + UNREACHABLE(); + } virtual bool IsControlFlow() const { return false; } virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } @@ -1298,14 +1393,30 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { // copying, the uses lists are being updated. void CopyEnvironmentFrom(HEnvironment* environment) { ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); - environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_ = new (allocator) HEnvironment( + allocator, + environment->Size(), + environment->GetDexFile(), + environment->GetMethodIdx(), + environment->GetDexPc()); environment_->CopyFrom(environment); + if (environment->GetParent() != nullptr) { + environment_->SetAndCopyParentChain(allocator, environment->GetParent()); + } } void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment, HBasicBlock* block) { ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); - environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_ = new (allocator) HEnvironment( + allocator, + environment->Size(), + environment->GetDexFile(), + environment->GetMethodIdx(), + environment->GetDexPc()); + if (environment->GetParent() != nullptr) { + environment_->SetAndCopyParentChain(allocator, environment->GetParent()); + } environment_->CopyFromWithLoopPhiAdjustment(environment, block); } @@ -1682,7 +1793,7 @@ class HDeoptimize : public HTemplateInstruction<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Deoptimize); @@ -2086,15 +2197,16 @@ class HFloatConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } bool IsMinusOne() const OVERRIDE { - return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) == - bit_cast<uint32_t, float>((-1.0f)); + return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f)); } bool IsZero() const OVERRIDE { - return AsFloatConstant()->GetValue() == 0.0f; + return value_ == 0.0f; } bool IsOne() const OVERRIDE { - return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) == - bit_cast<uint32_t, float>(1.0f); + return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f); + } + bool IsNaN() const { + return std::isnan(value_); } DECLARE_INSTRUCTION(FloatConstant); @@ -2124,15 +2236,16 @@ class HDoubleConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } bool IsMinusOne() const OVERRIDE { - return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) == - bit_cast<uint64_t, double>((-1.0)); + return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0)); } bool IsZero() const OVERRIDE { - return AsDoubleConstant()->GetValue() == 0.0; + return value_ == 0.0; } bool IsOne() const OVERRIDE { - return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) == - bit_cast<uint64_t, double>(1.0); + return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0); + } + bool IsNaN() const { + return std::isnan(value_); } DECLARE_INSTRUCTION(DoubleConstant); @@ -2251,7 +2364,7 @@ class HInvoke : public HInstruction { Primitive::Type GetType() const OVERRIDE { return return_type_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetDexMethodIndex() const { return dex_method_index_; } @@ -2468,7 +2581,7 @@ class HNewInstance : public HExpression<0> { type_index_(type_index), entrypoint_(entrypoint) {} - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. @@ -2520,7 +2633,7 @@ class HNewArray : public HExpression<1> { SetRawInputAt(0, length); } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. @@ -2615,7 +2728,7 @@ class HDiv : public HBinaryOperation { return (y == -1) ? -x : x / y; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Div); @@ -2642,7 +2755,7 @@ class HRem : public HBinaryOperation { return (y == -1) ? 0 : x % y; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Rem); @@ -2669,7 +2782,7 @@ class HDivZeroCheck : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(DivZeroCheck); @@ -2864,7 +2977,7 @@ class HTypeConversion : public HExpression<1> { // Required by the x86 and ARM code generators when producing calls // to the runtime. - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } @@ -2974,7 +3087,7 @@ class HNullCheck : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(NullCheck); @@ -3137,7 +3250,7 @@ class HArraySet : public HTemplateInstruction<3> { bool NeedsTypeCheck() const { return needs_type_check_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } @@ -3207,7 +3320,7 @@ class HBoundsCheck : public HExpression<2> { bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(BoundsCheck); @@ -3247,19 +3360,25 @@ class HTemporary : public HTemplateInstruction<0> { class HSuspendCheck : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {} + : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {} bool NeedsEnvironment() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } + SlowPathCode* GetSlowPath() const { return slow_path_; } DECLARE_INSTRUCTION(SuspendCheck); private: const uint32_t dex_pc_; + // Only used for code generation, in order to share the same slow path between back edges + // of a same loop. + SlowPathCode* slow_path_; + DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; @@ -3286,7 +3405,7 @@ class HLoadClass : public HExpression<0> { size_t ComputeHashCode() const OVERRIDE { return type_index_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } bool IsReferrersClass() const { return is_referrers_class_; } @@ -3360,7 +3479,7 @@ class HLoadString : public HExpression<0> { size_t ComputeHashCode() const OVERRIDE { return string_index_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetStringIndex() const { return string_index_; } // TODO: Can we deopt or debug when we resolve a string? @@ -3398,7 +3517,7 @@ class HClinitCheck : public HExpression<1> { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } @@ -3498,7 +3617,7 @@ class HThrow : public HTemplateInstruction<1> { bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Throw); @@ -3532,7 +3651,7 @@ class HInstanceOf : public HExpression<2> { return false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3607,7 +3726,7 @@ class HCheckCast : public HTemplateInstruction<2> { bool MustDoNullCheck() const { return must_do_null_check_; } void ClearMustDoNullCheck() { must_do_null_check_ = false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3653,7 +3772,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsEnter() const { return kind_ == kEnter; } diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 4e83ce576c..2736453ccc 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -16,6 +16,7 @@ #include "base/arena_allocator.h" #include "nodes.h" +#include "optimizing_unit_test.h" #include "gtest/gtest.h" @@ -29,7 +30,7 @@ TEST(Node, RemoveInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -49,7 +50,8 @@ TEST(Node, RemoveInstruction) { first_block->AddSuccessor(exit_block); exit_block->AddInstruction(new (&allocator) HExit()); - HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); + HEnvironment* environment = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); @@ -70,7 +72,7 @@ TEST(Node, InsertInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -96,7 +98,7 @@ TEST(Node, AddInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -112,4 +114,51 @@ TEST(Node, AddInstruction) { ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse()); } +TEST(Node, ParentEnvironment) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = CreateGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0); + entry->AddInstruction(parameter1); + entry->AddInstruction(with_environment); + entry->AddInstruction(new (&allocator) HExit()); + + ASSERT_TRUE(parameter1->HasUses()); + ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse()); + + HEnvironment* environment = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + GrowableArray<HInstruction*> array(&allocator, 1); + array.Add(parameter1); + + environment->CopyFrom(array); + with_environment->SetRawEnvironment(environment); + + ASSERT_TRUE(parameter1->HasEnvironmentUses()); + ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse()); + + HEnvironment* parent1 = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + parent1->CopyFrom(array); + + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u); + + HEnvironment* parent2 = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + parent2->CopyFrom(array); + parent1->SetAndCopyParentChain(&allocator, parent2); + + // One use for parent2, and one other use for the new parent of parent1. + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u); + + // We have copied the parent chain. So we now have two more uses. + environment->SetAndCopyParentChain(&allocator, parent1); + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index b2c13adf35..7aea249c42 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -21,6 +21,7 @@ #include "cfi_test.h" #include "gtest/gtest.h" #include "optimizing/code_generator.h" +#include "optimizing/optimizing_unit_test.h" #include "utils/assembler.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -45,10 +46,10 @@ class OptimizingCFITest : public CFITest { std::unique_ptr<const InstructionSetFeatures> isa_features; std::string error; isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); - HGraph graph(&allocator); + HGraph* graph = CreateGraph(&allocator); // Generate simple frame with some spills. std::unique_ptr<CodeGenerator> code_gen( - CodeGenerator::Create(&graph, isa, *isa_features.get(), opts)); + CodeGenerator::Create(graph, isa, *isa_features.get(), opts)); const int frame_size = 64; int core_reg = 0; int fp_reg = 0; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 05451bcaa6..8bb5d8ebae 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -320,8 +320,10 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats); - HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final"); + HDeadCodeElimination dce1(graph, stats, + HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination dce2(graph, stats, + HDeadCodeElimination::kFinalDeadCodeEliminationPassName); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); HBooleanSimplifier boolean_simplify(graph); @@ -512,7 +514,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite ArenaAllocator arena(Runtime::Current()->GetArenaPool()); HGraph* graph = new (&arena) HGraph( - &arena, compiler_driver->GetCompilerOptions().GetDebuggable()); + &arena, dex_file, method_idx, compiler_driver->GetCompilerOptions().GetDebuggable()); // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 65c84e6942..b6b1bb1cad 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,25 +29,26 @@ enum MethodCompilationStat { kCompiledBaseline, kCompiledOptimized, kCompiledQuick, - kInstructionSimplifications, kInlinedInvoke, - kNotCompiledUnsupportedIsa, - kNotCompiledPathological, + kInstructionSimplifications, + kNotCompiledBranchOutsideMethodCode, + kNotCompiledCannotBuildSSA, + kNotCompiledCantAccesType, + kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, - kNotCompiledCannotBuildSSA, kNotCompiledNoCodegen, - kNotCompiledUnresolvedMethod, - kNotCompiledUnresolvedField, kNotCompiledNonSequentialRegPair, + kNotCompiledPathological, kNotCompiledSpaceFilter, - kNotOptimizedTryCatch, - kNotOptimizedDisabled, - kNotCompiledCantAccesType, - kNotOptimizedRegisterAllocator, kNotCompiledUnhandledInstruction, + kNotCompiledUnresolvedField, + kNotCompiledUnresolvedMethod, + kNotCompiledUnsupportedIsa, kNotCompiledVerifyAtRuntime, - kNotCompiledClassNotVerified, + kNotOptimizedDisabled, + kNotOptimizedRegisterAllocator, + kNotOptimizedTryCatch, kRemovedCheckedCast, kRemovedDeadInstruction, kRemovedNullCheck, @@ -98,23 +99,24 @@ class OptimizingCompilerStats { case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; case kInstructionSimplifications: return "kInstructionSimplifications"; - case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; - case kNotCompiledPathological : return "kNotCompiledPathological"; + case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode"; + case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; + case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; + case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; - case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; - case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; - case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; - case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; - case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; + case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; - case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; + case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; + case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; + case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; - case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; + case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; + case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; + case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6b236927da..4f8ec65e43 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -72,11 +72,16 @@ void RemoveSuspendChecks(HGraph* graph) { } } +inline HGraph* CreateGraph(ArenaAllocator* allocator) { + return new (allocator) HGraph( + allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1); +} + // Create a control-flow graph from Dex instructions. inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data, Primitive::Type return_type = Primitive::kPrimInt) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph, return_type); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 293fde978e..c56100dfa1 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -30,7 +30,7 @@ namespace art { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 2375595978..f53f846326 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1534,9 +1534,10 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) { - DCHECK(current->CoversSlow(env_use->GetPosition()) || (env_use->GetPosition() == range->GetEnd())); - LocationSummary* locations = env_use->GetUser()->GetLocations(); - locations->SetEnvironmentAt(env_use->GetInputIndex(), source); + DCHECK(current->CoversSlow(env_use->GetPosition()) + || (env_use->GetPosition() == range->GetEnd())); + HEnvironment* environment = env_use->GetUser()->GetEnvironment(); + environment->SetLocationAt(env_use->GetInputIndex(), source); env_use = env_use->GetNext(); } diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 8c6d904a4c..b72ffb8bf7 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -38,7 +38,7 @@ namespace art { static bool Check(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -60,7 +60,7 @@ static bool Check(const uint16_t* data) { TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); @@ -255,7 +255,7 @@ TEST(RegisterAllocatorTest, Loop2) { } static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -463,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, HPhi** phi, HInstruction** input1, HInstruction** input2) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -593,7 +593,7 @@ TEST(RegisterAllocatorTest, PhiHint) { static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -661,7 +661,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { static HGraph* BuildTwoSubs(ArenaAllocator* allocator, HInstruction** first_sub, HInstruction** second_sub) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -731,7 +731,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { static HGraph* BuildDiv(ArenaAllocator* allocator, HInstruction** div) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -783,7 +783,7 @@ TEST(RegisterAllocatorTest, SpillInactive) { // Create a synthesized graph to please the register_allocator and // ssa_liveness_analysis code. ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index b66e655d2b..59a2852735 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() { } HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { - return GetLocalsFor(block)->GetInstructionAt(local); + return GetLocalsFor(block)->Get(local); } void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { @@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); block->AddPhi(phi); - current_locals_->SetRawEnvAt(local, phi); + current_locals_->Put(local, phi); } } // Save the loop header so that the last phase of the analysis knows which @@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { block->AddPhi(phi); value = phi; } - current_locals_->SetRawEnvAt(local, value); + current_locals_->Put(local, value); } } @@ -520,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); // If the operation requests a specific type, we make sure its input is of that type. if (load->GetType() != value->GetType()) { if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { @@ -534,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1)); + current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1)); store->GetBlock()->RemoveInstruction(store); } @@ -543,8 +543,12 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { return; } HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( - GetGraph()->GetArena(), current_locals_->Size()); - environment->CopyFrom(current_locals_); + GetGraph()->GetArena(), + current_locals_->Size(), + GetGraph()->GetDexFile(), + GetGraph()->GetMethodIdx(), + instruction->GetDexPc()); + environment->CopyFrom(*current_locals_); instruction->SetRawEnvironment(environment); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 265e95b4ac..1c83c4ba48 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - HEnvironment* GetLocalsFor(HBasicBlock* block) { - HEnvironment* env = locals_for_.Get(block->GetBlockId()); - if (env == nullptr) { - env = new (GetGraph()->GetArena()) HEnvironment( + GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { + GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); + if (locals == nullptr) { + locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals_for_.Put(block->GetBlockId(), env); + locals->SetSize(GetGraph()->GetNumberOfVRegs()); + locals_for_.Put(block->GetBlockId(), locals); } - return env; + return locals; } HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); @@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor { static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); // Locals for the current block being visited. - HEnvironment* current_locals_; + GrowableArray<HInstruction*>* current_locals_; // Keep track of loop headers found. The last phase of the analysis iterates // over these blocks to set the inputs of their phis. GrowableArray<HBasicBlock*> loop_headers_; // HEnvironment for each block. - GrowableArray<HEnvironment*> locals_for_; + GrowableArray<GrowableArray<HInstruction*>*> locals_for_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 17841685b1..250eb04a1c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().Size(); if (block->IsLoopHeader()) { - // We rely on having simplified the CFG. - DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges()); - number_of_forward_predecessors--; + number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); } @@ -220,10 +218,11 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // Process the environment first, because we know their uses come after // or at the same liveness position of inputs. - if (current->HasEnvironment()) { + for (HEnvironment* environment = current->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { // Handle environment uses. See statements (b) and (c) of the // SsaLivenessAnalysis. - HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); bool should_be_live = ShouldBeLiveForEnvironment(instruction); @@ -233,7 +232,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (instruction != nullptr) { instruction->GetLiveInterval()->AddUse( - current, i, /* is_environment */ true, should_be_live); + current, environment, i, should_be_live); } } } @@ -245,7 +244,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // to be materialized. if (input->HasSsaIndex()) { live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false); + input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); } } } @@ -264,13 +263,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { - HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0); + size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { HInstruction* current = instructions_from_ssa_index_.Get(idx); - current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), - back_edge->GetLifetimeEnd()); + current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 7b98c4eab5..82c5454bb0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -104,13 +104,13 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { class UsePosition : public ArenaObject<kArenaAllocMisc> { public: UsePosition(HInstruction* user, + HEnvironment* environment, size_t input_index, - bool is_environment, size_t position, UsePosition* next) : user_(user), + environment_(environment), input_index_(input_index), - is_environment_(is_environment), position_(position), next_(next) { DCHECK((user == nullptr) @@ -129,7 +129,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { HInstruction* GetUser() const { return user_; } - bool GetIsEnvironment() const { return is_environment_; } + bool GetIsEnvironment() const { return environment_ != nullptr; } bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } @@ -144,7 +144,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { UsePosition* Dup(ArenaAllocator* allocator) const { return new (allocator) UsePosition( - user_, input_index_, is_environment_, position_, + user_, environment_, input_index_, position_, next_ == nullptr ? nullptr : next_->Dup(allocator)); } @@ -159,8 +159,8 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { private: HInstruction* const user_; + HEnvironment* const environment_; const size_t input_index_; - const bool is_environment_; const size_t position_; UsePosition* next_; @@ -237,15 +237,16 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user"; size_t position = instruction->GetLifetimePosition(); first_use_ = new (allocator_) UsePosition( - instruction, temp_index, /* is_environment */ false, position, first_use_); + instruction, /* environment */ nullptr, temp_index, position, first_use_); AddRange(position, position + 1); } void AddUse(HInstruction* instruction, + HEnvironment* environment, size_t input_index, - bool is_environment, bool keep_alive = false) { // Set the use within the instruction. + bool is_environment = (environment != nullptr); size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); if (!is_environment) { @@ -279,7 +280,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } DCHECK(first_use_->GetPosition() + 1 == position); UsePosition* new_use = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, cursor->GetNext()); + instruction, environment, input_index, position, cursor->GetNext()); cursor->SetNext(new_use); if (first_range_->GetEnd() == first_use_->GetPosition()) { first_range_->end_ = position; @@ -289,10 +290,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_environment) { first_env_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_env_use_); + instruction, environment, input_index, position, first_env_use_); } else { first_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_); + instruction, environment, input_index, position, first_use_); } if (is_environment && !keep_alive) { @@ -331,7 +332,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { AddBackEdgeUses(*block); } first_use_ = new (allocator_) UsePosition( - instruction, input_index, false, block->GetLifetimeEnd(), first_use_); + instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_); } void AddRange(size_t start, size_t end) { @@ -973,7 +974,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { break; } - size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd(); + // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on + // all back edges is not necessary: anything used in the loop will have its use at the + // last back edge. If we want branches in a loop to have better register allocation than + // another branch, then it is the linear order we should change. + size_t back_edge_use_position = current->GetLifetimeEnd(); if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { // There was a use already seen in this loop. Therefore the previous call to `AddUse` // already inserted the backedge use. We can stop going outward. @@ -985,8 +990,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { || back_edge_use_position > last_in_new_list->GetPosition()); UsePosition* new_use = new (allocator_) UsePosition( - nullptr, UsePosition::kNoInput, /* is_environment */ false, - back_edge_use_position, nullptr); + /* user */ nullptr, + /* environment */ nullptr, + UsePosition::kNoInput, + back_edge_use_position, + /* next */ nullptr); if (last_in_new_list != nullptr) { // Going outward. The latest created use needs to point to the new use. diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 00c241b85a..fb3e7d798c 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -78,7 +78,7 @@ static void ReNumberInstructions(HGraph* graph) { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) { const char* expected = "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [5]\n" - " 1: IntConstant 4 [14, 8, 8]\n" - " 2: IntConstant 5 [14]\n" + " 1: IntConstant 4 [5, 8, 8]\n" + " 2: IntConstant 5 [5]\n" " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 8, succ: 6, 3\n" - " 5: Phi(0, 14) [12, 6, 6]\n" + "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n" + " 5: Phi(0, 2, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" "BasicBlock 3, pred: 2, succ: 5, 4\n" " 8: Equal(1, 1) [9]\n" " 9: If(8)\n" - "BasicBlock 4, pred: 3, succ: 8\n" + "BasicBlock 4, pred: 3, succ: 2\n" " 10: Goto\n" - "BasicBlock 5, pred: 3, succ: 8\n" + "BasicBlock 5, pred: 3, succ: 2\n" " 11: Goto\n" "BasicBlock 6, pred: 2, succ: 7\n" " 12: Return(5)\n" "BasicBlock 7, pred: 6\n" - " 13: Exit\n" - // Synthesized single back edge of loop. - "BasicBlock 8, pred: 5, 4, succ: 2\n" - " 14: Phi(1, 2) [5]\n" - " 15: Goto\n"; + " 13: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index a5a0eb2114..5ca66a1de6 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -30,7 +30,7 @@ namespace art { static void TestCode(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 282ab96ce4..5e9653df33 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -272,6 +272,10 @@ void Mips64Assembler::Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0x25, rs, rt, imm16); } +void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x27, rs, rt, imm16); +} + void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) { EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16); } @@ -480,6 +484,9 @@ void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuR case kLoadWord: Lw(reg, base, offset); break; + case kLoadUnsignedWord: + Lwu(reg, base, offset); + break; case kLoadDoubleword: // TODO: alignment issues ??? Ld(reg, base, offset); @@ -512,7 +519,6 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, CHECK_EQ(0u, size) << dst; } else if (dst.IsGpuRegister()) { if (size == 4) { - CHECK_EQ(4u, size) << dst; LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset); } else if (size == 8) { CHECK_EQ(8u, size) << dst; @@ -740,14 +746,13 @@ void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> sr void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); - LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value()); + LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), SP, src.Int32Value()); } -void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, - MemberOffset offs) { +void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) { Mips64ManagedRegister dest = mdest.AsMips64(); - CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()); - LoadFromOffset(kLoadWord, dest.AsGpuRegister(), + CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister()); + LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), base.AsMips64().AsGpuRegister(), offs.Int32Value()); if (kPoisonHeapReferences) { Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister()); @@ -921,7 +926,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) if (in_reg.IsNoRegister()) { - LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(), + LoadFromOffset(kLoadUnsignedWord, out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); in_reg = out_reg; } @@ -944,7 +949,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { Label null_arg; - LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, + LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. @@ -998,7 +1003,7 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; // Call *(*(SP + base) + offset) - LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), + LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, base.Int32Value()); LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index b7f6a9e83a..2d7c661eac 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -36,6 +36,7 @@ enum LoadOperandType { kLoadSignedHalfword, kLoadUnsignedHalfword, kLoadWord, + kLoadUnsignedWord, kLoadDoubleword }; @@ -85,6 +86,7 @@ class Mips64Assembler FINAL : public Assembler { void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lui(GpuRegister rt, uint16_t imm16); void Mfhi(GpuRegister rd); void Mflo(GpuRegister rd); diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f2541a2113..7e7520066d 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1507,6 +1507,14 @@ void X86Assembler::jmp(Label* label) { } +void X86Assembler::repne_scasw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF2); + EmitUint8(0xAF); +} + + X86Assembler* X86Assembler::lock() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF0); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 946c96de71..136b0cbfdb 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -464,6 +464,8 @@ class X86Assembler FINAL : public Assembler { void jmp(const Address& address); void jmp(Label* label); + void repne_scasw(); + X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); void cmpxchg8b(const Address& address); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index f326e496d4..aacc57bb0c 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -190,4 +190,10 @@ TEST_F(AssemblerX86Test, FPUIntegerStore) { DriverStr(expected, "FPUIntegerStore"); } +TEST_F(AssemblerX86Test, Repnescasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "Repnescasw"); +} + } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index c0ca7ef437..feceecac68 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -2065,6 +2065,14 @@ void X86_64Assembler::bswapq(CpuRegister dst) { } +void X86_64Assembler::repne_scasw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF2); + EmitUint8(0xAF); +} + + void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { // TODO: Need to have a code constants table. int64_t constant = bit_cast<int64_t, double>(value); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index f5327a8d02..162714af68 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -601,6 +601,8 @@ class X86_64Assembler FINAL : public Assembler { void bswapl(CpuRegister dst); void bswapq(CpuRegister dst); + void repne_scasw(); + // // Macros for High-level operations. // diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 9e4144ac26..0be4d632fb 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1215,4 +1215,10 @@ TEST_F(AssemblerX86_64Test, MovsxbRegs) { DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb"); } +TEST_F(AssemblerX86_64Test, Repnescasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "Repnescasw"); +} + } // namespace art |