summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc239
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h10
-rw-r--r--compiler/optimizing/induction_var_analysis.cc37
-rw-r--r--compiler/optimizing/induction_var_analysis.h7
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc25
-rw-r--r--compiler/optimizing/induction_var_range.h10
-rw-r--r--compiler/optimizing/loop_optimization.cc149
-rw-r--r--compiler/optimizing/loop_optimization.h4
-rw-r--r--compiler/optimizing/nodes.h7
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc2
10 files changed, 384 insertions, 106 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 32287a0f2a..cac0543da3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -37,6 +37,7 @@ namespace arm {
namespace vixl32 = vixl::aarch32;
using namespace vixl32; // NOLINT(build/namespaces)
+using helpers::DRegisterFrom;
using helpers::DWARFReg;
using helpers::FromLowSToD;
using helpers::HighDRegisterFrom;
@@ -488,6 +489,15 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
isa_features_(isa_features) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
+ // Give d14 and d15 as scratch registers to VIXL.
+ // They are removed from the register allocator in `SetupBlockedRegisters()`.
+ // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
+ // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
+ // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
+ // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
+ // also want to investigate giving those 14 other D registers to the allocator.
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
}
#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->
@@ -509,6 +519,13 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
// Reserve temp register.
blocked_core_registers_[IP] = true;
+ // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
+ // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
+ blocked_fpu_registers_[28] = true;
+ blocked_fpu_registers_[29] = true;
+ blocked_fpu_registers_[30] = true;
+ blocked_fpu_registers_[31] = true;
+
if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
@@ -1331,6 +1348,26 @@ void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant
// Will be generated at use site.
}
+void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -2161,6 +2198,92 @@ void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
}
}
+void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
+ switch (compare->InputAt(0)->GetType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Output overlaps because it is written before doing the low comparison.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
+ LocationSummary* locations = compare->GetLocations();
+ vixl32::Register out = OutputRegister(compare);
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+
+ vixl32::Label less, greater, done;
+ Primitive::Type type = compare->InputAt(0)->GetType();
+ vixl32::Condition less_cond = vixl32::Condition(kNone);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
+ __ Mov(out, 0);
+ __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare.
+ less_cond = lt;
+ break;
+ }
+ case Primitive::kPrimLong: {
+ __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare.
+ __ B(lt, &less);
+ __ B(gt, &greater);
+ // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
+ __ Mov(out, 0);
+ __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
+ less_cond = lo;
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ __ Mov(out, 0);
+ GenerateVcmp(compare);
+ // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected compare type " << type;
+ UNREACHABLE();
+ }
+
+ __ B(eq, &done);
+ __ B(less_cond, &less);
+
+ __ Bind(&greater);
+ __ Mov(out, 1);
+ __ B(&done);
+
+ __ Bind(&less);
+ __ Mov(out, -1);
+
+ __ Bind(&done);
+}
+
void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -2648,6 +2771,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
}
}
+Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+ DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType();
+ if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+ (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+ return Location::ConstantLocation(input->AsConstant());
+ } else {
+ return Location::RequiresFpuRegister();
+ }
+}
+
void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
@@ -2989,9 +3122,27 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
}
} else if (source.IsFpuRegister()) {
- TODO_VIXL32(FATAL);
+ if (destination.IsRegister()) {
+ TODO_VIXL32(FATAL);
+ } else if (destination.IsFpuRegister()) {
+ __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
+ } else {
+ DCHECK(destination.IsStackSlot());
+ GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
+ }
} else if (source.IsDoubleStackSlot()) {
- TODO_VIXL32(FATAL);
+ if (destination.IsDoubleStackSlot()) {
+ vixl32::DRegister temp = temps.AcquireD();
+ GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
+ GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
+ } else if (destination.IsRegisterPair()) {
+ DCHECK(ExpectedPairLayout(destination));
+ GetAssembler()->LoadFromOffset(
+ kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsFpuRegisterPair()) << destination;
+ GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
+ }
} else if (source.IsRegisterPair()) {
if (destination.IsRegisterPair()) {
__ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
@@ -3009,7 +3160,14 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
destination.GetStackIndex());
}
} else if (source.IsFpuRegisterPair()) {
- TODO_VIXL32(FATAL);
+ if (destination.IsRegisterPair()) {
+ TODO_VIXL32(FATAL);
+ } else if (destination.IsFpuRegisterPair()) {
+ __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
+ }
} else {
DCHECK(source.IsConstant()) << source;
HConstant* constant = source.GetConstant();
@@ -3070,18 +3228,77 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
}
}
-void ParallelMoveResolverARMVIXL::Exchange(Register reg ATTRIBUTE_UNUSED,
- int mem ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, reg);
+ GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
}
-void ParallelMoveResolverARMVIXL::Exchange(int mem1 ATTRIBUTE_UNUSED,
- int mem2 ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
+ // TODO(VIXL32): Double check the performance of this implementation.
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ vixl32::SRegister temp_s = temps.AcquireS();
+
+ __ Ldr(temp, MemOperand(sp, mem1));
+ __ Vldr(temp_s, MemOperand(sp, mem2));
+ __ Str(temp, MemOperand(sp, mem2));
+ __ Vstr(temp_s, MemOperand(sp, mem1));
}
-void ParallelMoveResolverARMVIXL::EmitSwap(size_t index ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
+ MoveOperands* move = moves_[index];
+ Location source = move->GetSource();
+ Location destination = move->GetDestination();
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+
+ if (source.IsRegister() && destination.IsRegister()) {
+ vixl32::Register temp = temps.Acquire();
+ DCHECK(!RegisterFrom(source).Is(temp));
+ DCHECK(!RegisterFrom(destination).Is(temp));
+ __ Mov(temp, RegisterFrom(destination));
+ __ Mov(RegisterFrom(destination), RegisterFrom(source));
+ __ Mov(RegisterFrom(source), temp);
+ } else if (source.IsRegister() && destination.IsStackSlot()) {
+ Exchange(RegisterFrom(source), destination.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsRegister()) {
+ Exchange(RegisterFrom(destination), source.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
+ vixl32::DRegister temp = temps.AcquireD();
+ __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
+ __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
+ __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
+ __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
+ } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
+ vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
+ int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
+ DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
+ vixl32::DRegister temp = temps.AcquireD();
+ __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
+ GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
+ GetAssembler()->StoreDToOffset(temp, sp, mem);
+ } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+ vixl32::DRegister temp1 = temps.AcquireD();
+ vixl32::DRegister temp2 = temps.AcquireD();
+ __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
+ __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
+ __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
+ __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
+ } else {
+ LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
+ }
}
void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index c749f8620a..1cd6184dd4 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -110,11 +110,14 @@ class LoadClassSlowPathARMVIXL;
M(BelowOrEqual) \
M(ClearException) \
M(ClinitCheck) \
+ M(Compare) \
M(CurrentMethod) \
M(Div) \
M(DivZeroCheck) \
+ M(DoubleConstant) \
M(Equal) \
M(Exit) \
+ M(FloatConstant) \
M(Goto) \
M(GreaterThan) \
M(GreaterThanOrEqual) \
@@ -161,10 +164,7 @@ class LoadClassSlowPathARMVIXL;
M(BoundType) \
M(CheckCast) \
M(ClassTableGet) \
- M(Compare) \
M(Deoptimize) \
- M(DoubleConstant) \
- M(FloatConstant) \
M(InstanceOf) \
M(InvokeInterface) \
M(InvokeUnresolved) \
@@ -246,7 +246,7 @@ class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap {
ArmVIXLAssembler* GetAssembler() const;
private:
- void Exchange(Register reg, int mem);
+ void Exchange(vixl32::Register reg, int mem);
void Exchange(int mem1, int mem2);
CodeGeneratorARMVIXL* const codegen_;
@@ -280,6 +280,8 @@ class LocationsBuilderARMVIXL : public HGraphVisitor {
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ Location ArithmeticZeroOrFpuRegister(HInstruction* input);
+
CodeGeneratorARMVIXL* const codegen_;
InvokeDexCallingConventionVisitorARM parameter_visitor_;
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 4bea3c362b..f2602fbf8c 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -23,12 +23,12 @@ namespace art {
* Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
* along dependences, viz. any of (a, b, c, d), (d, a, b, c) (c, d, a, b), (b, c, d, a) assuming
* a chain of dependences (mutual independent items may occur in arbitrary order). For proper
- * classification, the lexicographically first entry-phi is rotated to the front.
+ * classification, the lexicographically first loop-phi is rotated to the front.
*/
static void RotateEntryPhiFirst(HLoopInformation* loop,
ArenaVector<HInstruction*>* scc,
ArenaVector<HInstruction*>* new_scc) {
- // Find very first entry-phi.
+ // Find very first loop-phi.
const HInstructionList& phis = loop->GetHeader()->GetPhis();
HInstruction* phi = nullptr;
size_t phi_pos = -1;
@@ -41,7 +41,7 @@ static void RotateEntryPhiFirst(HLoopInformation* loop,
}
}
- // If found, bring that entry-phi to front.
+ // If found, bring that loop-phi to front.
if (phi != nullptr) {
new_scc->clear();
for (size_t i = 0; i < size; i++) {
@@ -94,7 +94,9 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph)
graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
type_(Primitive::kPrimVoid),
induction_(std::less<HLoopInformation*>(),
- graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
+ graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+ cycles_(std::less<HPhi*>(),
+ graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
}
void HInductionVarAnalysis::Run() {
@@ -244,13 +246,13 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) {
const size_t size = scc_.size();
DCHECK_GE(size, 1u);
- // Rotate proper entry-phi to front.
+ // Rotate proper loop-phi to front.
if (size > 1) {
ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis));
RotateEntryPhiFirst(loop, &scc_, &other);
}
- // Analyze from entry-phi onwards.
+ // Analyze from loop-phi onwards.
HInstruction* phi = scc_[0];
if (!phi->IsLoopHeaderPhi()) {
return;
@@ -262,6 +264,9 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) {
return;
}
+ // Store interesting cycle.
+ AssignCycle(phi->AsPhi());
+
// Singleton is wrap-around induction if all internal links have the same meaning.
if (size == 1) {
InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1);
@@ -365,6 +370,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu
// can be combined with an invariant to yield a similar result. Even two linear inputs can
// be combined. All other combinations fail, however.
if (a != nullptr && b != nullptr) {
+ type_ = Narrowest(type_, Narrowest(a->type, b->type));
if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
return CreateInvariantOp(op, a, b);
} else if (a->induction_class == kLinear && b->induction_class == kLinear) {
@@ -401,6 +407,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti
// can be multiplied with an invariant to yield a similar but multiplied result.
// Two non-invariant inputs cannot be multiplied, however.
if (a != nullptr && b != nullptr) {
+ type_ = Narrowest(type_, Narrowest(a->type, b->type));
if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
return CreateInvariantOp(kMul, a, b);
} else if (a->induction_class == kInvariant) {
@@ -441,6 +448,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti
// Transfer over a unary negation: an invariant, linear, wrap-around, or periodic input
// yields a similar but negated induction as result.
if (a != nullptr) {
+ type_ = Narrowest(type_, a->type);
if (a->induction_class == kInvariant) {
return CreateInvariantOp(kNeg, nullptr, a);
}
@@ -940,6 +948,23 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type);
}
+
+void HInductionVarAnalysis::AssignCycle(HPhi* phi) {
+ ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>(
+ graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second;
+ for (HInstruction* i : scc_) {
+ set->insert(i);
+ }
+}
+
+ArenaSet<HInstruction*>* HInductionVarAnalysis::LookupCycle(HPhi* phi) {
+ auto it = cycles_.find(phi);
+ if (it != cycles_.end()) {
+ return &it->second;
+ }
+ return nullptr;
+}
+
bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) {
return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value);
}
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index d19078248c..70271799d2 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -214,6 +214,8 @@ class HInductionVarAnalysis : public HOptimization {
InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction);
InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
+ void AssignCycle(HPhi* phi);
+ ArenaSet<HInstruction*>* LookupCycle(HPhi* phi);
// Constants.
bool IsExact(InductionInfo* info, /*out*/ int64_t* value);
@@ -240,6 +242,11 @@ class HInductionVarAnalysis : public HOptimization {
*/
ArenaSafeMap<HLoopInformation*, ArenaSafeMap<HInstruction*, InductionInfo*>> induction_;
+ /**
+ * Preserves induction cycle information for each loop-phi.
+ */
+ ArenaSafeMap<HPhi*, ArenaSet<HInstruction*>> cycles_;
+
friend class InductionVarAnalysisTest;
friend class InductionVarRange;
friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 7599c8fd79..031f1d74a8 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -740,6 +740,31 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) {
EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str());
}
+TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) {
+ // Setup:
+ // for (int i = 0; i < 100; i++) {
+ // k = (byte) i;
+ // a[k] = 0;
+ // k = k + 1
+ // a[k] = 0;
+ // }
+ BuildLoopNest(1);
+ HInstruction* conv = InsertInstruction(
+ new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+ HInstruction* store1 = InsertArrayStore(conv, 0);
+ HInstruction* add = InsertInstruction(
+ new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0);
+ HInstruction* store2 = InsertArrayStore(add, 0);
+
+ PerformInductionVarAnalysis();
+
+ // Byte induction (k) is "transferred" over conversion into addition (k + 1).
+ // This means only values within byte range can be trusted (even though
+ // addition can jump out of the range of course).
+ EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
+ EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str());
+}
+
TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
// Setup:
// for (byte i = -128; i < 127; i++) { // just fits!
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 2f70046a27..034cf32b2d 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -136,10 +136,20 @@ class InductionVarRange {
*/
void ReVisit(HLoopInformation* loop) {
induction_analysis_->induction_.erase(loop);
+ for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) {
+ induction_analysis_->cycles_.erase(it.Current()->AsPhi());
+ }
induction_analysis_->VisitLoop(loop);
}
/**
+ * Lookup an interesting cycle associated with an entry phi.
+ */
+ ArenaSet<HInstruction*>* LookupCycle(HPhi* phi) const {
+ return induction_analysis_->LookupCycle(phi);
+ }
+
+ /**
* Checks if header logic of a loop terminates.
*/
bool IsFinite(HLoopInformation* loop) const;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index b88e73b979..51be1d1e91 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -20,82 +20,6 @@
namespace art {
-// Detects a potential induction cycle. Note that the actual induction
-// information is queried later if its last value is really needed.
-static bool IsPhiInduction(HPhi* phi, ArenaSet<HInstruction*>* iset) {
- DCHECK(iset->empty());
- HInputsRef inputs = phi->GetInputs();
- if (inputs.size() == 2) {
- HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
- HInstruction* op = inputs[1];
- if (op->GetBlock()->GetLoopInformation() == loop_info) {
- // Chase a simple chain back to phi.
- while (!op->IsPhi()) {
- // Binary operation with single use in same loop.
- if (!op->IsBinaryOperation() || !op->GetUses().HasExactlyOneElement()) {
- return false;
- }
- // Chase back either through left or right operand.
- iset->insert(op);
- HInstruction* a = op->InputAt(0);
- HInstruction* b = op->InputAt(1);
- if (a->GetBlock()->GetLoopInformation() == loop_info && b != phi) {
- op = a;
- } else if (b->GetBlock()->GetLoopInformation() == loop_info) {
- op = b;
- } else {
- return false;
- }
- }
- // Closed the cycle?
- if (op == phi) {
- iset->insert(phi);
- return true;
- }
- }
- }
- return false;
-}
-
-// Find: phi: Phi(init, addsub)
-// s: SuspendCheck
-// c: Condition(phi, bound)
-// i: If(c)
-// TODO: Find a less pattern matching approach?
-static bool IsEmptyHeader(HBasicBlock* block, ArenaSet<HInstruction*>* iset) {
- DCHECK(iset->empty());
- HInstruction* phi = block->GetFirstPhi();
- if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi(), iset)) {
- HInstruction* s = block->GetFirstInstruction();
- if (s != nullptr && s->IsSuspendCheck()) {
- HInstruction* c = s->GetNext();
- if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
- HInstruction* i = c->GetNext();
- if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
- iset->insert(c);
- iset->insert(s);
- return true;
- }
- }
- }
- }
- return false;
-}
-
-// Does the loop-body consist of induction cycle and direct control flow only?
-static bool IsEmptyBody(HBasicBlock* block, ArenaSet<HInstruction*>* iset) {
- if (block->GetFirstPhi() == nullptr) {
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instruction = it.Current();
- if (!instruction->IsGoto() && iset->find(instruction) == iset->end()) {
- return false;
- }
- }
- return true;
- }
- return false;
-}
-
// Remove the instruction from the graph. A bit more elaborate than the usual
// instruction removal, since there may be a cycle in the use structure.
static void RemoveFromCycle(HInstruction* instruction) {
@@ -242,7 +166,7 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) {
HPhi* phi = it.Current()->AsPhi();
iset_->clear();
int32_t use_count = 0;
- if (IsPhiInduction(phi, iset_) &&
+ if (IsPhiInduction(phi) &&
IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
TryReplaceWithLastValue(phi, use_count, preheader)) {
for (HInstruction* i : *iset_) {
@@ -256,15 +180,14 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) {
void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- // Remove instructions that are dead, usually resulting from eliminating induction cycles.
+ // Remove instructions that are dead.
for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
HInstruction* instruction = i.Current();
if (instruction->IsDeadAndRemovable()) {
block->RemoveInstruction(instruction);
}
}
- // Remove trivial control flow blocks from the loop-body, again usually resulting
- // from eliminating induction cycles.
+ // Remove trivial control flow blocks from the loop-body.
if (block->GetPredecessors().size() == 1 &&
block->GetSuccessors().size() == 1 &&
block->GetFirstInstruction()->IsGoto()) {
@@ -314,8 +237,8 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
// subsequent index uses, if any, with the last value and remove the loop.
iset_->clear();
int32_t use_count = 0;
- if (IsEmptyHeader(header, iset_) &&
- IsEmptyBody(body, iset_) &&
+ if (IsEmptyHeader(header) &&
+ IsEmptyBody(body) &&
IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
body->DisconnectAndDelete();
@@ -333,6 +256,68 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
}
}
+bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+ ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
+ if (set != nullptr) {
+ for (HInstruction* i : *set) {
+ // Check that, other than phi, instruction are removable with uses contained in the cycle.
+ // TODO: investigate what cases are no longer in the graph.
+ if (i != phi) {
+ if (!i->IsInBlock() || !i->IsRemovable()) {
+ return false;
+ }
+ for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
+ if (set->find(use.GetUser()) == set->end()) {
+ return false;
+ }
+ }
+ }
+ }
+ DCHECK(iset_->empty());
+ iset_->insert(set->begin(), set->end()); // copy
+ return true;
+ }
+ return false;
+}
+
+// Find: phi: Phi(init, addsub)
+// s: SuspendCheck
+// c: Condition(phi, bound)
+// i: If(c)
+// TODO: Find a less pattern matching approach?
+bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+ DCHECK(iset_->empty());
+ HInstruction* phi = block->GetFirstPhi();
+ if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+ HInstruction* s = block->GetFirstInstruction();
+ if (s != nullptr && s->IsSuspendCheck()) {
+ HInstruction* c = s->GetNext();
+ if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
+ HInstruction* i = c->GetNext();
+ if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
+ iset_->insert(c);
+ iset_->insert(s);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
+ if (block->GetFirstPhi() == nullptr) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
/*out*/ int32_t* use_count) {
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 4113357035..e18d17531e 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -64,6 +64,10 @@ class HLoopOptimization : public HOptimization {
void SimplifyBlocks(LoopNode* node);
void RemoveIfEmptyInnerLoop(LoopNode* node);
+ bool IsPhiInduction(HPhi* phi);
+ bool IsEmptyHeader(HBasicBlock* block);
+ bool IsEmptyBody(HBasicBlock* block);
+
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
/*out*/ int32_t* use_count);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 710f709c89..6a45149509 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1945,7 +1945,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
}
- bool IsDeadAndRemovable() const {
+ bool IsRemovable() const {
return
!HasSideEffects() &&
!CanThrow() &&
@@ -1953,11 +1953,14 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
!IsControlFlow() &&
!IsNativeDebugInfo() &&
!IsParameterValue() &&
- !HasUses() &&
// If we added an explicit barrier then we should keep it.
!IsMemoryBarrier();
}
+ bool IsDeadAndRemovable() const {
+ return IsRemovable() && !HasUses();
+ }
+
// Does this instruction strictly dominate `other_instruction`?
// Returns false if this instruction and `other_instruction` are the same.
// Aborts if this instruction and `other_instruction` are both phis.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index bacf9940ca..013e110b87 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -52,7 +52,7 @@ class OptimizingCFITest : public CFITest {
void SetUpFrame(InstructionSet isa) {
// Setup simple context.
std::string error;
- isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error);
graph_ = CreateGraph(&allocator_);
// Generate simple frame with some spills.
code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);