diff options
Diffstat (limited to 'compiler/optimizing')
24 files changed, 603 insertions, 190 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index f985745e7a..f0cafc847f 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -61,40 +61,6 @@ static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne(); } -// Returns an instruction with the opposite boolean value from 'cond'. -static HInstruction* GetOppositeCondition(HInstruction* cond) { - HGraph* graph = cond->GetBlock()->GetGraph(); - ArenaAllocator* allocator = graph->GetArena(); - - if (cond->IsCondition()) { - HInstruction* lhs = cond->InputAt(0); - HInstruction* rhs = cond->InputAt(1); - switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* - case kCondEQ: return new (allocator) HEqual(lhs, rhs); - case kCondNE: return new (allocator) HNotEqual(lhs, rhs); - case kCondLT: return new (allocator) HLessThan(lhs, rhs); - case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs); - case kCondGT: return new (allocator) HGreaterThan(lhs, rhs); - case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs); - case kCondB: return new (allocator) HBelow(lhs, rhs); - case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs); - case kCondA: return new (allocator) HAbove(lhs, rhs); - case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs); - } - } else if (cond->IsIntConstant()) { - HIntConstant* int_const = cond->AsIntConstant(); - if (int_const->IsZero()) { - return graph->GetIntConstant(1); - } else { - DCHECK(int_const->IsOne()); - return graph->GetIntConstant(0); - } - } - // General case when 'cond' is another instruction of type boolean, - // as verified by SSAChecker. - return new (allocator) HBooleanNot(cond); -} - void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { DCHECK(block->EndsWithIf()); @@ -126,10 +92,7 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } + replacement = graph_->InsertOppositeCondition(if_condition, if_instruction); } else if (PreservesCondition(true_value, false_value)) { replacement = if_condition; } else { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 3257de1858..32968a597b 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -876,12 +876,78 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, clinit_check); } +bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + + // Only the non-resolved entrypoint handles the finalizable class case. If we + // need access checks, then we haven't resolved the method and the class may + // again be finalizable. + QuickEntrypointEnum entrypoint = (finalizable || can_throw) + ? kQuickAllocObject + : kQuickAllocObjectInitialized; + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + soa.Self(), *dex_compilation_unit_->GetDexFile()))); + Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + + if (outer_dex_cache.Get() != dex_cache.Get()) { + // We currently do not support inlining allocations across dex files. + return false; + } + + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + type_index, + *dex_compilation_unit_->GetDexFile(), + IsOutermostCompilingClass(type_index), + dex_pc, + /*needs_access_check*/ can_throw); + + current_block_->AddInstruction(load_class); + HInstruction* cls = load_class; + if (!IsInitialized(resolved_class, type_index)) { + cls = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(cls); + } + + current_block_->AddInstruction(new (arena_) HNewInstance( + cls, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, + entrypoint)); + return true; +} + +bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const { + if (cls.Get() == nullptr) { + return false; + } + if (GetOutermostCompilingClass() == cls.Get()) { + return true; + } + // TODO: find out why this check is needed. + bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( + *outer_compilation_unit_->GetDexFile(), type_index); + return cls->IsInitialized() && is_in_dex_cache; +} + HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( uint32_t dex_pc, uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -927,13 +993,8 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( // whether we should add an explicit class initialization // check for its declaring class before the static method call. - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = - resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - - if (is_initialized) { + Handle<mirror::Class> cls(hs.NewHandle(resolved_method->GetDeclaringClass())); + if (IsInitialized(cls, storage_index)) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; @@ -1272,7 +1333,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -1318,11 +1379,6 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, *dex_compilation_unit_->GetDexFile(), @@ -1332,12 +1388,14 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, current_block_->AddInstruction(constant); HInstruction* cls = constant; - if (!is_initialized && !is_outer_class) { + + Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass())); + if (!IsInitialized(klass, storage_index)) { cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } - uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); + uint16_t class_def_index = klass->GetDexClassDefIndex(); if (is_put) { // We need to keep the class alive before loading the value. Temporaries temps(graph_); @@ -2509,20 +2567,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - bool finalizable; - bool can_throw = NeedsAccessCheck(type_index, &finalizable); - QuickEntrypointEnum entrypoint = can_throw - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance( - graph_->GetCurrentMethod(), - dex_pc, - type_index, - *dex_compilation_unit_->GetDexFile(), - can_throw, - finalizable, - entrypoint)); + if (!BuildNewInstance(type_index, dex_pc)) { + return false; + } UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } break; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index f857ef0e12..615b0cd738 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -308,6 +308,14 @@ class HGraphBuilder : public ValueObject { uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement); + // Build a HNewInstance instruction. + bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); + + // Return whether the compiler can assume `cls` is initialized. `type_index` is the index + // of the class in the outer dex file. + bool IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const + SHARED_REQUIRES(Locks::mutator_lock_); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cb6bed08ec..461319eae7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -3361,7 +3361,19 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { __ mov(o_l, ShifterOperand(high)); __ LoadImmediate(o_h, 0); } - } else { // shift_value < 32 + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ adc(o_h, high, ShifterOperand(high)); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; if (op->IsShl()) { __ Lsl(o_h, high, shift_value); __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value)); @@ -3413,14 +3425,12 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), @@ -4320,7 +4330,7 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for read barrier too. + locations->AddTemp(Location::RequiresRegister()); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2776b7d6c9..d82cb672a0 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1628,6 +1628,47 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + Register res = OutputRegister(instr); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: + // madd, msub, smaddl, smsubl, umaddl and umsubl. + if (instr->GetType() == Primitive::kPrimLong && + codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { + MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); + vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; + if (prev->IsLoadOrStore()) { + // Make sure we emit only exactly one nop. + vixl::CodeBufferCheckScope scope(masm, + vixl::kInstructionSize, + vixl::CodeBufferCheckScope::kCheck, + vixl::CodeBufferCheckScope::kExactSize); + __ nop(); + } + } + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Madd(res, mul_left, mul_right, accumulator); + } else { + DCHECK(instr->GetOpKind() == HInstruction::kSub); + __ Msub(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -3372,17 +3413,13 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); - DCHECK(type_index.Is(w0)); - __ Mov(type_index, instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 801e203de5..f3178bd77c 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -3478,17 +3478,12 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - Register current_method_register = calling_convention.GetRegisterAt(1); - __ Lw(current_method_register, SP, kCurrentMethodStackOffset); - // Move an uint16_t value to a register. - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 8530fe7a36..802c435279 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3270,15 +3270,12 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - // Move an uint16_t value to a register. - __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex()); codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a87e8ede04..6a9177de26 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -3769,13 +3769,11 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), @@ -4856,7 +4854,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. + locations->AddTemp(Location::RegisterLocation(ECX)); } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dcc180804d..8cfd8cb985 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -3765,18 +3765,14 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), - instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), @@ -4500,8 +4496,6 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { // This first temporary register is possibly used for heap // reference poisoning and/or read barrier emission too. locations->AddTemp(Location::RequiresRegister()); - // This second temporary register is possibly used for read - // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d166d0061f..4438190ec3 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -422,6 +422,12 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } +#ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } +#endif + bool IsPass(const char* name) { return strcmp(pass_name_, name) == 0; } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c36de84064..4af111b784 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -377,9 +377,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { - set->Kill(current->GetSideEffects()); // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); + // Do not kill the set with the side effects of the instruction just now: if + // the instruction is GVN'ed, we don't need to kill. if (current->CanBeMoved()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) @@ -395,8 +396,11 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { + set->Kill(current->GetSideEffects()); set->Add(current); } + } else { + set->Kill(current->GetSideEffects()); } current = next; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 9ad2dd1c8e..2f3df7fc68 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -169,16 +169,6 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // src instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); - } else if (instruction->IsShl() && input_cst->IsOne()) { - // Replace Shl looking like - // SHL dst, src, 1 - // with - // ADD dst, src, src - HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), - input_other, - input_other); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); - RecordSimplification(); } } } @@ -372,9 +362,8 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { block->RemoveInstruction(equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { - // Replace (bool_value == false) with !bool_value - block->ReplaceAndRemoveInstructionWith( - equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal)); + block->RemoveInstruction(equal); RecordSimplification(); } else { // Replace (bool_value == integer_not_zero_nor_one_constant) with false @@ -399,9 +388,8 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { // We are comparing the boolean to a constant which is of type int and can // be any constant. if (input_const->AsIntConstant()->IsOne()) { - // Replace (bool_value != true) with !bool_value - block->ReplaceAndRemoveInstructionWith( - not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal)); + block->RemoveInstruction(not_equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { // Replace (bool_value != false) with bool_value diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index eb79f469eb..54dd2ccaf8 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -62,6 +62,67 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio RecordSimplification(); } +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -76,5 +137,78 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { Primitive::ComponentSize(instruction->GetComponentType())); } +void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4b697dba0e..eed2276588 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -40,8 +40,14 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* index, int access_size); + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 0a39ff31bf..890598d687 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2090,4 +2090,46 @@ void HInstruction::RemoveEnvironmentUsers() { env_uses_.Clear(); } +// Returns an instruction with the opposite boolean value from 'cond'. +HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) { + ArenaAllocator* allocator = GetArena(); + + if (cond->IsCondition() && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) { + // Can't reverse floating point conditions. We have to use HBooleanNot in that case. + HInstruction* lhs = cond->InputAt(0); + HInstruction* rhs = cond->InputAt(1); + HInstruction* replacement = nullptr; + switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* + case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break; + case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break; + case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break; + case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break; + case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break; + case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break; + case kCondB: replacement = new (allocator) HBelow(lhs, rhs); break; + case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break; + case kCondA: replacement = new (allocator) HAbove(lhs, rhs); break; + case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break; + default: + LOG(FATAL) << "Unexpected condition"; + UNREACHABLE(); + } + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } else if (cond->IsIntConstant()) { + HIntConstant* int_const = cond->AsIntConstant(); + if (int_const->IsZero()) { + return GetIntConstant(1); + } else { + DCHECK(int_const->IsOne()); + return GetIntConstant(0); + } + } else { + HInstruction* replacement = new (allocator) HBooleanNot(cond); + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4f894b07c7..1bd626fe2b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -371,6 +371,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + // Returns an instruction with the opposite boolean value from 'cond'. + // The instruction has been inserted into the graph, either as a constant, or + // before cursor. + HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor); + private: void FindBackEdges(ArenaBitVector* visited); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; @@ -1096,7 +1101,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1626,6 +1632,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { return holder_; } + + bool IsFromInlinedInvoke() const { + return GetParent() != nullptr; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -3238,7 +3249,7 @@ class HInvoke : public HInstruction { void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache); bool IsFromInlinedInvoke() const { - return GetEnvironment()->GetParent() != nullptr; + return GetEnvironment()->IsFromInlinedInvoke(); } bool CanThrow() const OVERRIDE { return true; } @@ -3652,9 +3663,10 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<1> { +class HNewInstance : public HExpression<2> { public: - HNewInstance(HCurrentMethod* current_method, + HNewInstance(HInstruction* cls, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, @@ -3667,7 +3679,8 @@ class HNewInstance : public HExpression<1> { can_throw_(can_throw), finalizable_(finalizable), entrypoint_(entrypoint) { - SetRawInputAt(0, current_method); + SetRawInputAt(0, cls); + SetRawInputAt(1, current_method); } uint16_t GetTypeIndex() const { return type_index_; } @@ -3687,6 +3700,10 @@ class HNewInstance : public HExpression<1> { QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } + void SetEntrypoint(QuickEntrypointEnum entrypoint) { + entrypoint_ = entrypoint; + } + DECLARE_INSTRUCTION(NewInstance); private: @@ -3694,7 +3711,7 @@ class HNewInstance : public HExpression<1> { const DexFile& dex_file_; const bool can_throw_; const bool finalizable_; - const QuickEntrypointEnum entrypoint_; + QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; @@ -4302,9 +4319,13 @@ class HPhi : public HInstruction { : HInstruction(SideEffects::None(), dex_pc), inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)), reg_number_(reg_number), - type_(type), - is_live_(false), + type_(ToPhiType(type)), + // Phis are constructed live and marked dead if conflicting or unused. + // Individual steps of SsaBuilder should assume that if a phi has been + // marked dead, it can be ignored and will be removed by SsaPhiElimination. + is_live_(true), can_be_null_(true) { + DCHECK_NE(type_, Primitive::kPrimVoid); } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. @@ -4927,6 +4948,7 @@ class HClinitCheck : public HExpression<1> { return true; } + bool CanThrow() const OVERRIDE { return true; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 885d3a29ee..d07f019c60 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -42,6 +42,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 2204921c53..dec08d8978 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -110,24 +110,23 @@ class PassScope; class PassObserver : public ValueObject { public: PassObserver(HGraph* graph, - const char* method_name, CodeGenerator* codegen, std::ostream* visualizer_output, CompilerDriver* compiler_driver) : graph_(graph), - method_name_(method_name), + cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), - timing_logger_(method_name, true, true), + timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, method_name)) { + if (!IsVerboseMethod(compiler_driver, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { - visualizer_.PrintHeader(method_name_); + visualizer_.PrintHeader(GetMethodName()); codegen->SetDisassemblyInformation(&disasm_info_); } } @@ -135,7 +134,7 @@ class PassObserver : public ValueObject { ~PassObserver() { if (timing_logger_enabled_) { - LOG(INFO) << "TIMINGS " << method_name_; + LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } } @@ -148,6 +147,14 @@ class PassObserver : public ValueObject { void SetGraphInBadState() { graph_in_bad_state_ = true; } + const char* GetMethodName() { + // PrettyMethod() is expensive, so we delay calling it until we actually have to. + if (cached_method_name_.empty()) { + cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile()); + } + return cached_method_name_.c_str(); + } + private: void StartPass(const char* pass_name) { // Dump graph first, then start timer. @@ -206,7 +213,8 @@ class PassObserver : public ValueObject { } HGraph* const graph_; - const char* method_name_; + + std::string cached_method_name_; bool timing_logger_enabled_; TimingLogger timing_logger_; @@ -664,7 +672,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, jobject class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - std::string method_name = PrettyMethod(method_idx, dex_file); MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -728,7 +735,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()); PassObserver pass_observer(graph, - method_name.c_str(), codegen.get(), visualizer_output_.get(), compiler_driver); @@ -756,7 +762,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, interpreter_metadata, dex_cache); - VLOG(compiler) << "Building " << method_name; + VLOG(compiler) << "Building " << pass_observer.GetMethodName(); { PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); @@ -766,13 +772,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, } } - VLOG(compiler) << "Optimizing " << method_name; + VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; + LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() + << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); pass_observer.SetGraphInBadState(); return nullptr; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f3d075caaa..d1770b75ab 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -48,22 +48,34 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { } void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { - // Try to find a static invoke from which this check originated. - HInvokeStaticOrDirect* invoke = nullptr; + // Try to find a static invoke or a new-instance from which this check originated. + HInstruction* implicit_clinit = nullptr; for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); - if (user->IsInvokeStaticOrDirect() && CanMoveClinitCheck(check, user)) { - invoke = user->AsInvokeStaticOrDirect(); - DCHECK(invoke->IsStaticWithExplicitClinitCheck()); - invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) && + CanMoveClinitCheck(check, user)) { + implicit_clinit = user; + if (user->IsInvokeStaticOrDirect()) { + DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()); + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + } else { + DCHECK(user->IsNewInstance()); + // We delegate the initialization duty to the allocation. + if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) { + user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved); + } + } break; } } - // If we found a static invoke for merging, remove the check from all other static invokes. - if (invoke != nullptr) { + // If we found a static invoke or new-instance for merging, remove the check + // from dominated static invokes. + if (implicit_clinit != nullptr) { for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) { HInstruction* user = it.Current()->GetUser(); - DCHECK(invoke->StrictlyDominates(user)); // All other uses must be dominated. + // All other uses must be dominated. + DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user)); it.Advance(); // Advance before we remove the node, reference to the next node is preserved. if (user->IsInvokeStaticOrDirect()) { user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( @@ -77,8 +89,8 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { check->ReplaceWith(load_class); - if (invoke != nullptr) { - // Remove the check from the graph. It has been merged into the invoke. + if (implicit_clinit != nullptr) { + // Remove the check from the graph. It has been merged into the invoke or new-instance. check->GetBlock()->RemoveInstruction(check); // Check if we can merge the load class as well. if (can_merge_with_load_class && !load_class->HasUses()) { @@ -92,6 +104,29 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { } } +void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { + HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); + bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); + // Change the entrypoint to kQuickAllocObject if either: + // - the class is finalizable (only kQuickAllocObject handles finalizable classes), + // - the class needs access checks (we do not know if it's finalizable), + // - or the load class has only one use. + if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObject); + instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); + // The allocation entry point that deals with access checks does not work with inlined + // methods, so we need to check whether this allocation comes from an inlined method. + if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) { + // We can remove the load class from the graph. If it needed access checks, we delegate + // the access check to the allocation. + if (load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + } + load_class->GetBlock()->RemoveInstruction(load_class); + } + } +} + void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index a70fb309df..9b2434250d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitNewInstance(HNewInstance* instruction) OVERRIDE; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c98f43e461..bde54ee977 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { - equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } else if (equivalent == input) { // The input has changed its type. It can be an input of other phis, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 5190eb3b26..9e6cfbe653 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,13 @@ namespace art { +// Returns whether this is a loop header phi which was eagerly created but later +// found inconsistent due to the vreg being undefined in one of its predecessors. +// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. +static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { + return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); +} + /** * A debuggable application may require to reviving phis, to ensure their * associated DEX register is available to a debugger. This class implements @@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); + if (IsUndefinedLoopHeaderPhi(phi)) { + DCHECK(phi->IsDead()); + continue; + } if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi to guarantee convergence of the algorithm. - // Note that the dead phi may already have a type if it is an equivalent - // generated for a typed LoadLocal. In that case we do not change the - // type because it could lead to an unsupported PrimNot/Float/Double -> - // PrimInt/Long transition and create same type equivalents. - if (phi->GetType() == Primitive::kPrimVoid) { - phi->SetType(phi->InputAt(0)->GetType()); - } + // Loop phis must have a type to guarantee convergence of the algorithm. + DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -220,6 +225,27 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } +void SsaBuilder::SetLoopHeaderPhiInputs() { + for (size_t i = loop_headers_.size(); i > 0; --i) { + HBasicBlock* block = loop_headers_[i - 1]; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + size_t vreg = phi->GetRegNumber(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + HInstruction* value = ValueOfLocal(predecessor, vreg); + if (value == nullptr) { + // Vreg is undefined at this predecessor. Mark it dead and leave with + // fewer inputs than predecessors. SsaChecker will fail if not removed. + phi->SetDead(); + break; + } else { + phi->AddInput(value); + } + } + } + } +} + void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() { } // 2) Set inputs of loop phis. - for (HBasicBlock* block : loop_headers_) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber()); - phi->AddInput(input); - } - } - } + SetLoopHeaderPhiInputs(); // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but @@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { for (size_t i = 0; i < vregs; ++i) { // No point in creating the catch phi if it is already undefined at // the first throwing instruction. - if ((*current_locals_)[i] != nullptr) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena) HPhi( + arena, + i, + 0, + current_local_value->GetType()); block->AddPhi(phi); (*locals)[i] = phi; } @@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + 0, + incoming->GetType()); block->AddPhi(phi); (*current_locals_)[local] = phi; } @@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } if (is_different) { + HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local); HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + block->GetPredecessors().size(), + first_input->GetType()); for (size_t i = 0; i < block->GetPredecessors().size(); i++) { HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local); phi->SetRawInputAt(i, pred_value); @@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: phi->GetBlock()->InsertPhiAfter(new_phi, phi); return new_phi; } else { - DCHECK_EQ(next->GetType(), type); - return next->AsPhi(); + HPhi* next_phi = next->AsPhi(); + DCHECK_EQ(next_phi->GetType(), type); + if (next_phi->IsDead()) { + // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) + // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This + // cannot revive undefined loop header phis because they cannot have uses. + DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); + next_phi->SetLive(); + } + return next_phi; } } @@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1); + uint32_t reg_number = store->GetLocal()->GetRegNumber(); + HInstruction* stored_value = store->InputAt(1); + Primitive::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, Primitive::kPrimVoid); + + // Storing into vreg `reg_number` may implicitly invalidate the surrounding + // registers. Consider the following cases: + // (1) Storing a wide value must overwrite previous values in both `reg_number` + // and `reg_number+1`. We store `nullptr` in `reg_number+1`. + // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number` + // must invalidate it. We store `nullptr` in `reg_number-1`. + // Consequently, storing a wide value into the high vreg of another wide value + // will invalidate both `reg_number-1` and `reg_number+1`. + + if (reg_number != 0) { + HInstruction* local_low = (*current_locals_)[reg_number - 1]; + if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + // The vreg we are storing into was previously the high vreg of a pair. + // We need to invalidate its low vreg. + DCHECK((*current_locals_)[reg_number] == nullptr); + (*current_locals_)[reg_number - 1] = nullptr; + } + } + + (*current_locals_)[reg_number] = stored_value; + if (Primitive::Is64BitType(stored_type)) { + // We are storing a pair. Invalidate the instruction in the high vreg. + (*current_locals_)[reg_number + 1] = nullptr; + } + store->GetBlock()->RemoveInstruction(store); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 79f1a28ac8..dcce5e4c2c 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + void SetLoopHeaderPhiInputs(); void FixNullConstantType(); void EquivalentPhisCleanup(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 72f9ddd506..a3219dcc38 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,6 +16,8 @@ #include "ssa_phi_elimination.h" +#include "base/arena_containers.h" + namespace art { void SsaDeadPhiElimination::Run() { @@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Phis are constructed live and should not be revived if previously marked + // dead. This algorithm temporarily breaks that invariant but we DCHECK that + // only phis which were initially live are revived. + ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter()); + // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); - // Set dead ahead of running through uses. The phi may have no use. - phi->SetDead(); + if (phi->IsDead()) { + continue; + } + + bool has_non_phi_use = false; for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* current = use_it.Current(); - HInstruction* user = current->GetUser(); - if (!user->IsPhi()) { - worklist_.push_back(phi); - phi->SetLive(); + if (!use_it.Current()->GetUser()->IsPhi()) { + has_non_phi_use = true; break; } } + + if (has_non_phi_use) { + worklist_.push_back(phi); + } else { + phi->SetDead(); + if (kIsDebugBuild) { + initially_live.insert(phi); + } + } } } @@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist_.back(); worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.push_back(input->AsPhi()); - input->AsPhi()->SetLive(); + HPhi* input = it.Current()->AsPhi(); + if (input != nullptr && input->IsDead()) { + // Input is a dead phi. Revive it and add to the worklist. We make sure + // that the phi was not dead initially (see definition of `initially_live`). + DCHECK(ContainsElement(initially_live, input)); + input->SetLive(); + worklist_.push_back(input); } } } @@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() { } if (phi->InputCount() == 0) { - DCHECK(phi->IsCatchPhi()); DCHECK(phi->IsDead()); continue; } |