diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 239 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 10 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_analysis.cc | 37 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_analysis.h | 7 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_analysis_test.cc | 25 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_range.h | 10 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 149 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 7 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_cfi_test.cc | 2 |
10 files changed, 384 insertions, 106 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 32287a0f2a..cac0543da3 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -37,6 +37,7 @@ namespace arm { namespace vixl32 = vixl::aarch32; using namespace vixl32; // NOLINT(build/namespaces) +using helpers::DRegisterFrom; using helpers::DWARFReg; using helpers::FromLowSToD; using helpers::HighDRegisterFrom; @@ -488,6 +489,15 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, isa_features_(isa_features) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); + // Give d14 and d15 as scratch registers to VIXL. + // They are removed from the register allocator in `SetupBlockedRegisters()`. + // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack + // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead + // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give + // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may + // also want to investigate giving those 14 other D registers to the allocator. + GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14); + GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15); } #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> @@ -509,6 +519,13 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { // Reserve temp register. blocked_core_registers_[IP] = true; + // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers. + // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.) + blocked_fpu_registers_[28] = true; + blocked_fpu_registers_[29] = true; + blocked_fpu_registers_[30] = true; + blocked_fpu_registers_[31] = true; + if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For @@ -1331,6 +1348,26 @@ void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant // Will be generated at use site. } +void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -2161,6 +2198,92 @@ void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { } } +void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Output overlaps because it is written before doing the low comparison. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { + LocationSummary* locations = compare->GetLocations(); + vixl32::Register out = OutputRegister(compare); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + vixl32::Label less, greater, done; + Primitive::Type type = compare->InputAt(0)->GetType(); + vixl32::Condition less_cond = vixl32::Condition(kNone); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags. + __ Mov(out, 0); + __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare. + less_cond = lt; + break; + } + case Primitive::kPrimLong: { + __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. + __ B(lt, &less); + __ B(gt, &greater); + // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. + __ Mov(out, 0); + __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. + less_cond = lo; + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + __ Mov(out, 0); + GenerateVcmp(compare); + // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS). + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); + break; + } + default: + LOG(FATAL) << "Unexpected compare type " << type; + UNREACHABLE(); + } + + __ B(eq, &done); + __ B(less_cond, &less); + + __ Bind(&greater); + __ Mov(out, 1); + __ B(&done); + + __ Bind(&less); + __ Mov(out, -1); + + __ Bind(&done); +} + void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2648,6 +2771,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, } } +Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) { + DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType(); + if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || + (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { + return Location::ConstantLocation(input->AsConstant()); + } else { + return Location::RequiresFpuRegister(); + } +} + void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); @@ -2989,9 +3122,27 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); } } else if (source.IsFpuRegister()) { - TODO_VIXL32(FATAL); + if (destination.IsRegister()) { + TODO_VIXL32(FATAL); + } else if (destination.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); + } else { + DCHECK(destination.IsStackSlot()); + GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); + } } else if (source.IsDoubleStackSlot()) { - TODO_VIXL32(FATAL); + if (destination.IsDoubleStackSlot()) { + vixl32::DRegister temp = temps.AcquireD(); + GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex()); + GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex()); + } else if (destination.IsRegisterPair()) { + DCHECK(ExpectedPairLayout(destination)); + GetAssembler()->LoadFromOffset( + kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex()); + } else { + DCHECK(destination.IsFpuRegisterPair()) << destination; + GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex()); + } } else if (source.IsRegisterPair()) { if (destination.IsRegisterPair()) { __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source)); @@ -3009,7 +3160,14 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { destination.GetStackIndex()); } } else if (source.IsFpuRegisterPair()) { - TODO_VIXL32(FATAL); + if (destination.IsRegisterPair()) { + TODO_VIXL32(FATAL); + } else if (destination.IsFpuRegisterPair()) { + __ Vmov(DRegisterFrom(destination), DRegisterFrom(source)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex()); + } } else { DCHECK(source.IsConstant()) << source; HConstant* constant = source.GetConstant(); @@ -3070,18 +3228,77 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { } } -void ParallelMoveResolverARMVIXL::Exchange(Register reg ATTRIBUTE_UNUSED, - int mem ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); +void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) { + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, reg); + GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem); } -void ParallelMoveResolverARMVIXL::Exchange(int mem1 ATTRIBUTE_UNUSED, - int mem2 ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); +void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) { + // TODO(VIXL32): Double check the performance of this implementation. + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + vixl32::SRegister temp_s = temps.AcquireS(); + + __ Ldr(temp, MemOperand(sp, mem1)); + __ Vldr(temp_s, MemOperand(sp, mem2)); + __ Str(temp, MemOperand(sp, mem2)); + __ Vstr(temp_s, MemOperand(sp, mem1)); } -void ParallelMoveResolverARMVIXL::EmitSwap(size_t index ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); +void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) { + MoveOperands* move = moves_[index]; + Location source = move->GetSource(); + Location destination = move->GetDestination(); + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + + if (source.IsRegister() && destination.IsRegister()) { + vixl32::Register temp = temps.Acquire(); + DCHECK(!RegisterFrom(source).Is(temp)); + DCHECK(!RegisterFrom(destination).Is(temp)); + __ Mov(temp, RegisterFrom(destination)); + __ Mov(RegisterFrom(destination), RegisterFrom(source)); + __ Mov(RegisterFrom(source), temp); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange(RegisterFrom(source), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange(RegisterFrom(destination), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + TODO_VIXL32(FATAL); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + TODO_VIXL32(FATAL); + } else if (source.IsRegisterPair() && destination.IsRegisterPair()) { + vixl32::DRegister temp = temps.AcquireD(); + __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source)); + __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination)); + __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination)); + __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp); + } else if (source.IsRegisterPair() || destination.IsRegisterPair()) { + vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination); + int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex(); + DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination)); + vixl32::DRegister temp = temps.AcquireD(); + __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1)); + GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem); + GetAssembler()->StoreDToOffset(temp, sp, mem); + } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { + TODO_VIXL32(FATAL); + } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { + TODO_VIXL32(FATAL); + } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { + TODO_VIXL32(FATAL); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + vixl32::DRegister temp1 = temps.AcquireD(); + vixl32::DRegister temp2 = temps.AcquireD(); + __ Vldr(temp1, MemOperand(sp, source.GetStackIndex())); + __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex())); + __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex())); + __ Vstr(temp2, MemOperand(sp, source.GetStackIndex())); + } else { + LOG(FATAL) << "Unimplemented" << source << " <-> " << destination; + } } void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index c749f8620a..1cd6184dd4 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -110,11 +110,14 @@ class LoadClassSlowPathARMVIXL; M(BelowOrEqual) \ M(ClearException) \ M(ClinitCheck) \ + M(Compare) \ M(CurrentMethod) \ M(Div) \ M(DivZeroCheck) \ + M(DoubleConstant) \ M(Equal) \ M(Exit) \ + M(FloatConstant) \ M(Goto) \ M(GreaterThan) \ M(GreaterThanOrEqual) \ @@ -161,10 +164,7 @@ class LoadClassSlowPathARMVIXL; M(BoundType) \ M(CheckCast) \ M(ClassTableGet) \ - M(Compare) \ M(Deoptimize) \ - M(DoubleConstant) \ - M(FloatConstant) \ M(InstanceOf) \ M(InvokeInterface) \ M(InvokeUnresolved) \ @@ -246,7 +246,7 @@ class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap { ArmVIXLAssembler* GetAssembler() const; private: - void Exchange(Register reg, int mem); + void Exchange(vixl32::Register reg, int mem); void Exchange(int mem1, int mem2); CodeGeneratorARMVIXL* const codegen_; @@ -280,6 +280,8 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + Location ArithmeticZeroOrFpuRegister(HInstruction* input); + CodeGeneratorARMVIXL* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 4bea3c362b..f2602fbf8c 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -23,12 +23,12 @@ namespace art { * Since graph traversal may enter a SCC at any position, an initial representation may be rotated, * along dependences, viz. any of (a, b, c, d), (d, a, b, c) (c, d, a, b), (b, c, d, a) assuming * a chain of dependences (mutual independent items may occur in arbitrary order). For proper - * classification, the lexicographically first entry-phi is rotated to the front. + * classification, the lexicographically first loop-phi is rotated to the front. */ static void RotateEntryPhiFirst(HLoopInformation* loop, ArenaVector<HInstruction*>* scc, ArenaVector<HInstruction*>* new_scc) { - // Find very first entry-phi. + // Find very first loop-phi. const HInstructionList& phis = loop->GetHeader()->GetPhis(); HInstruction* phi = nullptr; size_t phi_pos = -1; @@ -41,7 +41,7 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, } } - // If found, bring that entry-phi to front. + // If found, bring that loop-phi to front. if (phi != nullptr) { new_scc->clear(); for (size_t i = 0; i < size; i++) { @@ -94,7 +94,9 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph) graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), type_(Primitive::kPrimVoid), induction_(std::less<HLoopInformation*>(), - graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + cycles_(std::less<HPhi*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { } void HInductionVarAnalysis::Run() { @@ -244,13 +246,13 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { const size_t size = scc_.size(); DCHECK_GE(size, 1u); - // Rotate proper entry-phi to front. + // Rotate proper loop-phi to front. if (size > 1) { ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)); RotateEntryPhiFirst(loop, &scc_, &other); } - // Analyze from entry-phi onwards. + // Analyze from loop-phi onwards. HInstruction* phi = scc_[0]; if (!phi->IsLoopHeaderPhi()) { return; @@ -262,6 +264,9 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { return; } + // Store interesting cycle. + AssignCycle(phi->AsPhi()); + // Singleton is wrap-around induction if all internal links have the same meaning. if (size == 1) { InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1); @@ -365,6 +370,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu // can be combined with an invariant to yield a similar result. Even two linear inputs can // be combined. All other combinations fail, however. if (a != nullptr && b != nullptr) { + type_ = Narrowest(type_, Narrowest(a->type, b->type)); if (a->induction_class == kInvariant && b->induction_class == kInvariant) { return CreateInvariantOp(op, a, b); } else if (a->induction_class == kLinear && b->induction_class == kLinear) { @@ -401,6 +407,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti // can be multiplied with an invariant to yield a similar but multiplied result. // Two non-invariant inputs cannot be multiplied, however. if (a != nullptr && b != nullptr) { + type_ = Narrowest(type_, Narrowest(a->type, b->type)); if (a->induction_class == kInvariant && b->induction_class == kInvariant) { return CreateInvariantOp(kMul, a, b); } else if (a->induction_class == kInvariant) { @@ -441,6 +448,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti // Transfer over a unary negation: an invariant, linear, wrap-around, or periodic input // yields a similar but negated induction as result. if (a != nullptr) { + type_ = Narrowest(type_, a->type); if (a->induction_class == kInvariant) { return CreateInvariantOp(kNeg, nullptr, a); } @@ -940,6 +948,23 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type); } + +void HInductionVarAnalysis::AssignCycle(HPhi* phi) { + ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>( + graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second; + for (HInstruction* i : scc_) { + set->insert(i); + } +} + +ArenaSet<HInstruction*>* HInductionVarAnalysis::LookupCycle(HPhi* phi) { + auto it = cycles_.find(phi); + if (it != cycles_.end()) { + return &it->second; + } + return nullptr; +} + bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) { return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value); } diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index d19078248c..70271799d2 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -214,6 +214,8 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction); InductionInfo* CreateConstant(int64_t value, Primitive::Type type); InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b); + void AssignCycle(HPhi* phi); + ArenaSet<HInstruction*>* LookupCycle(HPhi* phi); // Constants. bool IsExact(InductionInfo* info, /*out*/ int64_t* value); @@ -240,6 +242,11 @@ class HInductionVarAnalysis : public HOptimization { */ ArenaSafeMap<HLoopInformation*, ArenaSafeMap<HInstruction*, InductionInfo*>> induction_; + /** + * Preserves induction cycle information for each loop-phi. + */ + ArenaSafeMap<HPhi*, ArenaSet<HInstruction*>> cycles_; + friend class InductionVarAnalysisTest; friend class InductionVarRange; friend class InductionVarRangeTest; diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 7599c8fd79..031f1d74a8 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -740,6 +740,31 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) { EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str()); } +TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) { + // Setup: + // for (int i = 0; i < 100; i++) { + // k = (byte) i; + // a[k] = 0; + // k = k + 1 + // a[k] = 0; + // } + BuildLoopNest(1); + HInstruction* conv = InsertInstruction( + new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0); + HInstruction* store1 = InsertArrayStore(conv, 0); + HInstruction* add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0); + HInstruction* store2 = InsertArrayStore(add, 0); + + PerformInductionVarAnalysis(); + + // Byte induction (k) is "transferred" over conversion into addition (k + 1). + // This means only values within byte range can be trusted (even though + // addition can jump out of the range of course). + EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str()); +} + TEST_F(InductionVarAnalysisTest, ByteLoopControl1) { // Setup: // for (byte i = -128; i < 127; i++) { // just fits! diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 2f70046a27..034cf32b2d 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -136,10 +136,20 @@ class InductionVarRange { */ void ReVisit(HLoopInformation* loop) { induction_analysis_->induction_.erase(loop); + for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) { + induction_analysis_->cycles_.erase(it.Current()->AsPhi()); + } induction_analysis_->VisitLoop(loop); } /** + * Lookup an interesting cycle associated with an entry phi. + */ + ArenaSet<HInstruction*>* LookupCycle(HPhi* phi) const { + return induction_analysis_->LookupCycle(phi); + } + + /** * Checks if header logic of a loop terminates. */ bool IsFinite(HLoopInformation* loop) const; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index b88e73b979..51be1d1e91 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -20,82 +20,6 @@ namespace art { -// Detects a potential induction cycle. Note that the actual induction -// information is queried later if its last value is really needed. -static bool IsPhiInduction(HPhi* phi, ArenaSet<HInstruction*>* iset) { - DCHECK(iset->empty()); - HInputsRef inputs = phi->GetInputs(); - if (inputs.size() == 2) { - HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation(); - HInstruction* op = inputs[1]; - if (op->GetBlock()->GetLoopInformation() == loop_info) { - // Chase a simple chain back to phi. - while (!op->IsPhi()) { - // Binary operation with single use in same loop. - if (!op->IsBinaryOperation() || !op->GetUses().HasExactlyOneElement()) { - return false; - } - // Chase back either through left or right operand. - iset->insert(op); - HInstruction* a = op->InputAt(0); - HInstruction* b = op->InputAt(1); - if (a->GetBlock()->GetLoopInformation() == loop_info && b != phi) { - op = a; - } else if (b->GetBlock()->GetLoopInformation() == loop_info) { - op = b; - } else { - return false; - } - } - // Closed the cycle? - if (op == phi) { - iset->insert(phi); - return true; - } - } - } - return false; -} - -// Find: phi: Phi(init, addsub) -// s: SuspendCheck -// c: Condition(phi, bound) -// i: If(c) -// TODO: Find a less pattern matching approach? -static bool IsEmptyHeader(HBasicBlock* block, ArenaSet<HInstruction*>* iset) { - DCHECK(iset->empty()); - HInstruction* phi = block->GetFirstPhi(); - if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi(), iset)) { - HInstruction* s = block->GetFirstInstruction(); - if (s != nullptr && s->IsSuspendCheck()) { - HInstruction* c = s->GetNext(); - if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) { - HInstruction* i = c->GetNext(); - if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { - iset->insert(c); - iset->insert(s); - return true; - } - } - } - } - return false; -} - -// Does the loop-body consist of induction cycle and direct control flow only? -static bool IsEmptyBody(HBasicBlock* block, ArenaSet<HInstruction*>* iset) { - if (block->GetFirstPhi() == nullptr) { - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (!instruction->IsGoto() && iset->find(instruction) == iset->end()) { - return false; - } - } - return true; - } - return false; -} - // Remove the instruction from the graph. A bit more elaborate than the usual // instruction removal, since there may be a cycle in the use structure. static void RemoveFromCycle(HInstruction* instruction) { @@ -242,7 +166,7 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { HPhi* phi = it.Current()->AsPhi(); iset_->clear(); int32_t use_count = 0; - if (IsPhiInduction(phi, iset_) && + if (IsPhiInduction(phi) && IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) && TryReplaceWithLastValue(phi, use_count, preheader)) { for (HInstruction* i : *iset_) { @@ -256,15 +180,14 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { void HLoopOptimization::SimplifyBlocks(LoopNode* node) { for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - // Remove instructions that are dead, usually resulting from eliminating induction cycles. + // Remove instructions that are dead. for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) { HInstruction* instruction = i.Current(); if (instruction->IsDeadAndRemovable()) { block->RemoveInstruction(instruction); } } - // Remove trivial control flow blocks from the loop-body, again usually resulting - // from eliminating induction cycles. + // Remove trivial control flow blocks from the loop-body. if (block->GetPredecessors().size() == 1 && block->GetSuccessors().size() == 1 && block->GetFirstInstruction()->IsGoto()) { @@ -314,8 +237,8 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { // subsequent index uses, if any, with the last value and remove the loop. iset_->clear(); int32_t use_count = 0; - if (IsEmptyHeader(header, iset_) && - IsEmptyBody(body, iset_) && + if (IsEmptyHeader(header) && + IsEmptyBody(body) && IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) && TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) { body->DisconnectAndDelete(); @@ -333,6 +256,68 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { } } +bool HLoopOptimization::IsPhiInduction(HPhi* phi) { + ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); + if (set != nullptr) { + for (HInstruction* i : *set) { + // Check that, other than phi, instruction are removable with uses contained in the cycle. + // TODO: investigate what cases are no longer in the graph. + if (i != phi) { + if (!i->IsInBlock() || !i->IsRemovable()) { + return false; + } + for (const HUseListNode<HInstruction*>& use : i->GetUses()) { + if (set->find(use.GetUser()) == set->end()) { + return false; + } + } + } + } + DCHECK(iset_->empty()); + iset_->insert(set->begin(), set->end()); // copy + return true; + } + return false; +} + +// Find: phi: Phi(init, addsub) +// s: SuspendCheck +// c: Condition(phi, bound) +// i: If(c) +// TODO: Find a less pattern matching approach? +bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) { + DCHECK(iset_->empty()); + HInstruction* phi = block->GetFirstPhi(); + if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) { + HInstruction* s = block->GetFirstInstruction(); + if (s != nullptr && s->IsSuspendCheck()) { + HInstruction* c = s->GetNext(); + if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) { + HInstruction* i = c->GetNext(); + if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { + iset_->insert(c); + iset_->insert(s); + return true; + } + } + } + } + return false; +} + +bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { + if (block->GetFirstPhi() == nullptr) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { + return false; + } + } + return true; + } + return false; +} + bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, /*out*/ int32_t* use_count) { diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 4113357035..e18d17531e 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -64,6 +64,10 @@ class HLoopOptimization : public HOptimization { void SimplifyBlocks(LoopNode* node); void RemoveIfEmptyInnerLoop(LoopNode* node); + bool IsPhiInduction(HPhi* phi); + bool IsEmptyHeader(HBasicBlock* block); + bool IsEmptyBody(HBasicBlock* block); + bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, /*out*/ int32_t* use_count); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 710f709c89..6a45149509 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1945,7 +1945,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return !HasEnvironmentUses() && GetUses().HasExactlyOneElement(); } - bool IsDeadAndRemovable() const { + bool IsRemovable() const { return !HasSideEffects() && !CanThrow() && @@ -1953,11 +1953,14 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { !IsControlFlow() && !IsNativeDebugInfo() && !IsParameterValue() && - !HasUses() && // If we added an explicit barrier then we should keep it. !IsMemoryBarrier(); } + bool IsDeadAndRemovable() const { + return IsRemovable() && !HasUses(); + } + // Does this instruction strictly dominate `other_instruction`? // Returns false if this instruction and `other_instruction` are the same. // Aborts if this instruction and `other_instruction` are both phis. diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index bacf9940ca..013e110b87 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -52,7 +52,7 @@ class OptimizingCFITest : public CFITest { void SetUpFrame(InstructionSet isa) { // Setup simple context. std::string error; - isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); graph_ = CreateGraph(&allocator_); // Generate simple frame with some spills. code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); |