diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 47 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 158 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/dead_code_elimination.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/dead_code_elimination.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/graph_checker.cc | 11 | ||||
-rw-r--r-- | compiler/optimizing/inliner.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler_stats.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 20 | ||||
-rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 20 |
15 files changed, 176 insertions, 122 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 76e2ec6f42..23ba339a98 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1353,16 +1353,16 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); - bool is_object = value_type == Primitive::kPrimNot; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall); - if (is_object) { + if (instruction->NeedsTypeCheck()) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); } else { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { @@ -1375,33 +1375,42 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); - if (value_type == Primitive::kPrimNot) { + LocationSummary* locations = instruction->GetLocations(); + bool needs_runtime_call = locations->WillCall(); + + if (needs_runtime_call) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } else { - LocationSummary* locations = instruction->GetLocations(); Register obj = InputRegisterAt(instruction, 0); CPURegister value = InputCPURegisterAt(instruction, 2); Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); MemOperand destination = HeapOperand(obj); MacroAssembler* masm = GetVIXLAssembler(); - UseScratchRegisterScope temps(masm); BlockPoolsScope block_pools(masm); + { + // We use a block to end the scratch scope before the write barrier, thus + // freeing the temporary registers so they can be used in `MarkGCCard`. + UseScratchRegisterScope temps(masm); + + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); + destination = HeapOperand(obj, offset); + } else { + Register temp = temps.AcquireSameSizeAs(obj); + Register index_reg = InputRegisterAt(instruction, 1); + __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); + destination = HeapOperand(temp, offset); + } - if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); - destination = HeapOperand(obj, offset); - } else { - Register temp = temps.AcquireSameSizeAs(obj); - Register index_reg = InputRegisterAt(instruction, 1); - __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); - destination = HeapOperand(temp, offset); + codegen_->Store(value_type, value, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) { + codegen_->MarkGCCard(obj, value.W()); } - - codegen_->Store(value_type, value, destination); - codegen_->MaybeRecordImplicitNullCheck(instruction); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 86e84ac66a..3dcfca6a0c 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1556,10 +1556,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimDouble: @@ -1589,10 +1587,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimFloat: @@ -1813,37 +1809,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-float' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // Operations use doubles for precision reasons (each 32-bit - // half of a long fits in the 53-bit mantissa of a double, - // but not in the 24-bit mantissa of a float). This is - // especially important for the low bits. The result is - // eventually converted to float. - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // result = double-to-float(result) - __ cvtsd2ss(result, result); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsStackSlot()) { + __ fstps(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstps(Address(ESP, 0)); + Location stack_temp = Location::StackSlot(0); + codegen_->Move32(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -1872,29 +1862,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-double' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsDoubleStackSlot()) { + __ fstpl(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstpl(Address(ESP, 0)); + Location stack_temp = Location::DoubleStackSlot(0); + codegen_->Move64(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -2234,24 +2226,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } -void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float) { +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, + uint32_t temp_offset, + uint32_t stack_adjustment, + bool is_fp, + bool is_wide) { if (source.IsStackSlot()) { - DCHECK(is_float); - __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(!is_wide); + if (is_fp) { + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else if (source.IsDoubleStackSlot()) { - DCHECK(!is_float); - __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(is_wide); + if (is_fp) { + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else { // Write the value to the temporary location on the stack and load to FP stack. - if (is_float) { + if (!is_wide) { Location stack_temp = Location::StackSlot(temp_offset); codegen_->Move32(stack_temp, source); - __ flds(Address(ESP, temp_offset)); + if (is_fp) { + __ flds(Address(ESP, temp_offset)); + } else { + __ filds(Address(ESP, temp_offset)); + } } else { Location stack_temp = Location::DoubleStackSlot(temp_offset); codegen_->Move64(stack_temp, source); - __ fldl(Address(ESP, temp_offset)); + if (is_fp) { + __ fldl(Address(ESP, temp_offset)); + } else { + __ fildl(Address(ESP, temp_offset)); + } } } } @@ -2270,8 +2281,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ subl(ESP, Immediate(2 * elem_size)); // Load the values to the FP stack in reverse order, using temporaries if needed. - PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); - PushOntoFPStack(first, 0, 2 * elem_size, is_float); + const bool is_wide = !is_float; + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); // Loop doing FPREM until we stabilize. Label retry; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 07476c6850..8bd3cd3585 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -174,8 +174,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. + // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float); + uint32_t stack_adjustment, bool is_fp, bool is_wide); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 8045cc5027..94990402e5 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -41,7 +41,6 @@ void HDeadCodeElimination::Run() { && !inst->IsMemoryBarrier() // If we added an explicit barrier then we should keep it. && !inst->HasUses()) { block->RemoveInstruction(inst); - MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction); } } } diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 3f309c514d..3db2c3ff3f 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -19,7 +19,6 @@ #include "nodes.h" #include "optimization.h" -#include "optimizing_compiler_stats.h" namespace art { @@ -29,8 +28,8 @@ namespace art { */ class HDeadCodeElimination : public HOptimization { public: - HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, true, kDeadCodeEliminationPassName, stats) {} + explicit HDeadCodeElimination(HGraph* graph) + : HOptimization(graph, true, kDeadCodeEliminationPassName) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 2216cecc2b..e743d8eca8 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -276,6 +276,17 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { id)); } } + + // If this is a nested loop, ensure the outer loops contain a superset of the blocks. + for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) { + HLoopInformation* outer_info = it.Current(); + if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) { + AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of " + "an outer loop defined by header %d.", + loop_header->GetBlockId(), + outer_info->GetHeader()->GetBlockId())); + } + } } void SSAChecker::VisitInstruction(HInstruction* instruction) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index bffd639e83..6d2a8d77e2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -190,7 +190,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, } // Run simple optimizations on the graph. - HDeadCodeElimination dce(callee_graph, stats_); + HDeadCodeElimination dce(callee_graph); HConstantFolding fold(callee_graph); InstructionSimplifier simplify(callee_graph, stats_); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index b8ae1f6369..f30c9a6cef 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -467,7 +467,8 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { return; } - if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) { + if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse() && + !Primitive::IsFloatingPointType(input->GetType())) { // Replace code looking like // SUB tmp, a, b // NEG dst, tmp @@ -478,6 +479,7 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { // worse code. In particular, we do not want the live ranges of `a` and `b` // to be extended if we are not sure the initial 'SUB' instruction can be // removed. + // We do not perform optimization for fp because we could lose the sign of zero. HSub* sub = input->AsSub(); HSub* new_sub = new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft()); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 3c7a2660db..95ab90de23 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -828,7 +828,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); return; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 5fca4fab22..4b9d4fc26b 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1064,8 +1064,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(current); outer_graph->reverse_post_order_.Put(++index_of_at, current); if (info != nullptr) { - info->Add(current); current->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + loop_it.Current()->Add(current); + } } } } @@ -1075,8 +1077,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(to); outer_graph->reverse_post_order_.Put(++index_of_at, to); if (info != nullptr) { - info->Add(to); to->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + loop_it.Current()->Add(to); + } if (info->IsBackEdge(*at)) { // Only `at` can become a back edge, as the inlined blocks // are predecessors of `at`. diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index d9d15c4b18..1565f58977 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2170,7 +2170,7 @@ class HInvoke : public HInstruction { uint32_t GetDexMethodIndex() const { return dex_method_index_; } - Intrinsics GetIntrinsic() { + Intrinsics GetIntrinsic() const { return intrinsic_; } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index ab752c3655..2ec8536cdf 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -320,8 +320,7 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats); - HDeadCodeElimination dce2(graph, stats); + HDeadCodeElimination dce(graph); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); HBooleanSimplifier boolean_not(graph); @@ -340,7 +339,7 @@ static void RunOptimizations(HGraph* graph, HOptimization* optimizations[] = { &intrinsics, - &dce1, + &dce, &fold1, &simplify1, // BooleanSimplifier depends on the InstructionSimplifier removing redundant @@ -353,8 +352,7 @@ static void RunOptimizations(HGraph* graph, &licm, &bce, &type_propagation, - &simplify2, - &dce2, + &simplify2 }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index e6508c9851..9bfa543401 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,7 +29,6 @@ enum MethodCompilationStat { kCompiledBaseline, kCompiledOptimized, kCompiledQuick, - kInstructionSimplifications, kInlinedInvoke, kNotCompiledUnsupportedIsa, kNotCompiledPathological, @@ -49,8 +48,8 @@ enum MethodCompilationStat { kNotCompiledVerifyAtRuntime, kNotCompiledClassNotVerified, kRemovedCheckedCast, - kRemovedDeadInstruction, kRemovedNullCheck, + kInstructionSimplifications, kLastStat }; @@ -97,7 +96,6 @@ class OptimizingCompilerStats { case kCompiledOptimized : return "kCompiledOptimized"; case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; - case kInstructionSimplifications: return "kInstructionSimplifications"; case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; @@ -116,8 +114,8 @@ class OptimizingCompilerStats { case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; - case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; + case kInstructionSimplifications: return "kInstructionSimplifications"; default: LOG(FATAL) << "invalid stat"; } return ""; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 6350b35ca1..f8e00f6873 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -903,6 +903,10 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { return false; } + // We use the first use to compare with other intervals. If this interval + // is used after any active intervals, we will spill this interval. + size_t first_use = current->FirstUseAfter(current->GetStart()); + // First set all registers as not being used. size_t* next_use = registers_array_; for (size_t i = 0; i < number_of_registers_; ++i) { @@ -917,7 +921,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (active->IsFixed()) { next_use[active->GetRegister()] = current->GetStart(); } else { - size_t use = active->FirstRegisterUseAfter(current->GetStart()); + size_t use = active->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[active->GetRegister()] = use; } @@ -945,7 +949,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { next_use[inactive->GetRegister()] = std::min(next_intersection, next_use[inactive->GetRegister()]); } else { - size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); + size_t use = inactive->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); } @@ -959,16 +963,16 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK(current->IsHighInterval()); reg = current->GetRegister(); // When allocating the low part, we made sure the high register was available. - DCHECK_LT(first_register_use, next_use[reg]); + DCHECK_LT(first_use, next_use[reg]); } else if (current->IsLowInterval()) { reg = FindAvailableRegisterPair(next_use, first_register_use); // We should spill if both registers are not available. - should_spill = (first_register_use >= next_use[reg]) - || (first_register_use >= next_use[GetHighForLowRegister(reg)]); + should_spill = (first_use >= next_use[reg]) + || (first_use >= next_use[GetHighForLowRegister(reg)]); } else { DCHECK(!current->IsHighInterval()); reg = FindAvailableRegister(next_use); - should_spill = (first_register_use >= next_use[reg]); + should_spill = (first_use >= next_use[reg]); } DCHECK_NE(reg, kNoRegister); @@ -998,10 +1002,12 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DumpInterval(std::cerr, current); DumpAllIntervals(std::cerr); // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); CHECK(false) << "There is not enough registers available for " << split->GetParent()->GetDefinedBy()->DebugName() << " " << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1; + << " at " << first_register_use - 1 << " " + << (at == nullptr ? "" : at->DebugName()); } AddSorted(unhandled_, split); } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 8eb98a186b..03f5545755 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -131,6 +131,9 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { void Dump(std::ostream& stream) const { stream << position_; + if (is_environment_) { + stream << " (env)"; + } } UsePosition* Dup(ArenaAllocator* allocator) const { @@ -366,6 +369,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveInterval* GetParent() const { return parent_; } + // Returns whether this interval is the parent interval, that is, the interval + // that starts where the HInstruction is defined. + bool IsParent() const { return parent_ == this; } + LiveRange* GetFirstRange() const { return first_range_; } LiveRange* GetLastRange() const { return last_range_; } @@ -442,7 +449,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && defined_by_ != nullptr) { + if (position == GetStart() && IsParent()) { LocationSummary* locations = defined_by_->GetLocations(); Location location = locations->Out(); // This interval is the first interval of the instruction. If the output @@ -491,12 +498,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } + if (position == GetStart() && IsParent()) { + if (defined_by_->GetLocations()->Out().IsValid()) { + return position; + } + } + UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { if (!use->GetIsEnvironment()) { + Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); size_t use_position = use->GetPosition(); - if (use_position > position) { + if (use_position > position && location.IsValid()) { return use_position; } } @@ -725,7 +739,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } void AddHighInterval(bool is_temp = false) { - DCHECK_EQ(GetParent(), this); + DCHECK(IsParent()); DCHECK(!HasHighInterval()); DCHECK(!HasLowInterval()); high_or_low_interval_ = new (allocator_) LiveInterval( |