diff options
Diffstat (limited to 'compiler/optimizing')
40 files changed, 2513 insertions, 1198 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 06328f2490..30c89f2d15 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -72,8 +72,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { return graph->GetIntConstant(0); } } else { - // General case when 'cond' is another instruction of type boolean. - DCHECK_EQ(cond->GetType(), Primitive::Type::kPrimBoolean); + // General case when 'cond' is another instruction of type boolean, + // as verified by SSAChecker. return new (allocator) HBooleanNot(cond); } } @@ -120,8 +120,11 @@ void HBooleanSimplifier::Run() { phi->ReplaceWith(replacement); merge_block->RemovePhi(phi); - // Link the start/end blocks and remove empty branches. - graph_->MergeEmptyBranches(block, merge_block); + // Delete the true branch and merge the resulting chain of blocks + // 'block->false_block->merge_block' into one. + true_block->DisconnectAndDelete(); + block->MergeWith(false_block); + block->MergeWith(merge_block); // Remove the original condition if it is now unused. if (!if_condition->HasUses()) { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 8a64d81485..818d671b5b 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -520,8 +520,24 @@ void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse) { UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } +static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) { + // dex compilation unit is null only when unit testing. + if (cu == nullptr) { + return false; + } + + Thread* self = Thread::Current(); + return cu->IsConstructor() + && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); +} + void HGraphBuilder::BuildReturn(const Instruction& instruction, Primitive::Type type) { if (type == Primitive::kPrimVoid) { + // Note that we might insert redundant barriers when inlining `super` calls. + // TODO: add a data flow analysis to get rid of duplicate barriers. + if (RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_)) { + current_block_->AddInstruction(new (arena_) HMemoryBarrier(kStoreStore)); + } current_block_->AddInstruction(new (arena_) HReturnVoid()); } else { HInstruction* value = LoadLocal(instruction.VRegA(), type); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 8ab759d393..5163395cac 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -612,7 +612,7 @@ void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const { } void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) { - uint32_t size = stack_map_stream_.ComputeNeededSize(); + uint32_t size = stack_map_stream_.PrepareForFillIn(); data->resize(size); MemoryRegion region(data->data(), size); stack_map_stream_.FillIn(region); @@ -654,7 +654,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth); + stack_map_stream_.BeginStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth); + stack_map_stream_.EndStackMapEntry(); return; } LocationSummary* locations = instruction->GetLocations(); @@ -672,12 +673,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.AddStackMapEntry(dex_pc, - pc_info.native_pc, - register_mask, - locations->GetStackMask(), - environment_size, - inlining_depth); + stack_map_stream_.BeginStackMapEntry(dex_pc, + pc_info.native_pc, + register_mask, + locations->GetStackMask(), + environment_size, + inlining_depth); // Walk over the environment, and record the location of dex registers. for (size_t i = 0; i < environment_size; ++i) { @@ -823,11 +824,14 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } + stack_map_stream_.EndStackMapEntry(); } bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); - return (first_next_not_move != nullptr) && first_next_not_move->CanDoImplicitNullCheck(); + + return (first_next_not_move != nullptr) + && first_next_not_move->CanDoImplicitNullCheckOn(null_check->InputAt(0)); } void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { @@ -842,7 +846,7 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { return; } - if (!instr->CanDoImplicitNullCheck()) { + if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) { return; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 7e03865b5f..ae1fb537bb 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1214,6 +1214,14 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant) UNUSED(constant); } +void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + memory_barrier->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); +} + void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -3890,9 +3898,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. - // TODO: avoid this check if we know obj is not null. - __ cmp(obj, ShifterOperand(0)); - __ b(&zero, EQ); + // avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ cmp(obj, ShifterOperand(0)); + __ b(&zero, EQ); + } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, out, obj, class_offset); __ cmp(out, ShifterOperand(cls)); @@ -3911,8 +3921,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ LoadImmediate(out, 1); __ b(&done); } - __ Bind(&zero); - __ LoadImmediate(out, 0); + + if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) { + __ Bind(&zero); + __ LoadImmediate(out, 0); + } + if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -3938,9 +3952,11 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ cmp(obj, ShifterOperand(0)); - __ b(slow_path->GetExitLabel(), EQ); + // avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ cmp(obj, ShifterOperand(0)); + __ b(slow_path->GetExitLabel(), EQ); + } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 06f425ea21..600903621d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -96,10 +96,10 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; -class ParallelMoveResolverARM : public ParallelMoveResolver { +class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) OVERRIDE; void EmitSwap(size_t index) OVERRIDE; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4be46126e9..1c6debdded 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -425,30 +425,67 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -void ParallelMoveResolverARM64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); - codegen_->MoveLocation(move->GetDestination(), move->GetSource()); -} - -void ParallelMoveResolverARM64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); - codegen_->SwapLocations(move->GetDestination(), move->GetSource()); +void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { + // Note: There are 6 kinds of moves: + // 1. constant -> GPR/FPR (non-cycle) + // 2. constant -> stack (non-cycle) + // 3. GPR/FPR -> GPR/FPR + // 4. GPR/FPR -> stack + // 5. stack -> GPR/FPR + // 6. stack -> stack (non-cycle) + // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 + // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting + // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the + // dependency. + vixl_temps_.Open(GetVIXLAssembler()); +} + +void ParallelMoveResolverARM64::FinishEmitNativeCode() { + vixl_temps_.Close(); +} + +Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { + DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister || + kind == Location::kStackSlot || kind == Location::kDoubleStackSlot); + kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister; + Location scratch = GetScratchLocation(kind); + if (!scratch.Equals(Location::NoLocation())) { + return scratch; + } + // Allocate from VIXL temp registers. + if (kind == Location::kRegister) { + scratch = LocationFrom(vixl_temps_.AcquireX()); + } else { + DCHECK(kind == Location::kFpuRegister); + scratch = LocationFrom(vixl_temps_.AcquireD()); + } + AddScratchLocation(scratch); + return scratch; } -void ParallelMoveResolverARM64::RestoreScratch(int reg) { - __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize)); +void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { + if (loc.IsRegister()) { + vixl_temps_.Release(XRegisterFrom(loc)); + } else { + DCHECK(loc.IsFpuRegister()); + vixl_temps_.Release(DRegisterFrom(loc)); + } + RemoveScratchLocation(loc); } -void ParallelMoveResolverARM64::SpillScratch(int reg) { - __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize)); +void ParallelMoveResolverARM64::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + codegen_->MoveLocation(move->GetDestination(), move->GetSource()); } void CodeGeneratorARM64::GenerateFrameEntry() { + MacroAssembler* masm = GetVIXLAssembler(); + BlockPoolsScope block_pools(masm); __ Bind(&frame_entry_label_); bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod(); if (do_overflow_check) { - UseScratchRegisterScope temps(GetVIXLAssembler()); + UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); @@ -474,6 +511,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { } void CodeGeneratorARM64::GenerateFrameExit() { + BlockPoolsScope block_pools(GetVIXLAssembler()); GetAssembler()->cfi().RememberState(); if (!HasEmptyFrame()) { int frame_size = GetFrameSize(); @@ -726,10 +764,10 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri if (destination.IsRegister()) { __ Mov(Register(dst), RegisterFrom(source, type)); } else { + DCHECK(destination.IsFpuRegister()); __ Fmov(FPRegister(dst), FPRegisterFrom(source, type)); } } - } else { // The destination is not a register. It must be a stack slot. DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); if (source.IsRegister() || source.IsFpuRegister()) { @@ -772,67 +810,6 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri } } -void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) { - DCHECK(!loc1.IsConstant()); - DCHECK(!loc2.IsConstant()); - - if (loc1.Equals(loc2)) { - return; - } - - UseScratchRegisterScope temps(GetAssembler()->vixl_masm_); - - bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); - bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); - bool is_fp_reg1 = loc1.IsFpuRegister(); - bool is_fp_reg2 = loc2.IsFpuRegister(); - - if (loc2.IsRegister() && loc1.IsRegister()) { - Register r1 = XRegisterFrom(loc1); - Register r2 = XRegisterFrom(loc2); - Register tmp = temps.AcquireSameSizeAs(r1); - __ Mov(tmp, r2); - __ Mov(r2, r1); - __ Mov(r1, tmp); - } else if (is_fp_reg2 && is_fp_reg1) { - FPRegister r1 = DRegisterFrom(loc1); - FPRegister r2 = DRegisterFrom(loc2); - FPRegister tmp = temps.AcquireSameSizeAs(r1); - __ Fmov(tmp, r2); - __ Fmov(r2, r1); - __ Fmov(r1, tmp); - } else if (is_slot1 != is_slot2) { - MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2); - Location reg_loc = is_slot1 ? loc2 : loc1; - CPURegister reg, tmp; - if (reg_loc.IsFpuRegister()) { - reg = DRegisterFrom(reg_loc); - tmp = temps.AcquireD(); - } else { - reg = XRegisterFrom(reg_loc); - tmp = temps.AcquireX(); - } - __ Ldr(tmp, mem); - __ Str(reg, mem); - if (reg_loc.IsFpuRegister()) { - __ Fmov(FPRegister(reg), FPRegister(tmp)); - } else { - __ Mov(Register(reg), Register(tmp)); - } - } else if (is_slot1 && is_slot2) { - MemOperand mem1 = StackOperandFrom(loc1); - MemOperand mem2 = StackOperandFrom(loc2); - Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX(); - Register tmp2 = temps.AcquireSameSizeAs(tmp1); - __ Ldr(tmp1, mem1); - __ Ldr(tmp2, mem2); - __ Str(tmp1, mem2); - __ Str(tmp2, mem1); - } else { - LOG(FATAL) << "Unimplemented"; - } -} - void CodeGeneratorARM64::Load(Primitive::Type type, CPURegister dst, const MemOperand& src) { @@ -865,7 +842,9 @@ void CodeGeneratorARM64::Load(Primitive::Type type, void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, CPURegister dst, const MemOperand& src) { - UseScratchRegisterScope temps(GetVIXLAssembler()); + MacroAssembler* masm = GetVIXLAssembler(); + BlockPoolsScope block_pools(masm); + UseScratchRegisterScope temps(masm); Register temp_base = temps.AcquireX(); Primitive::Type type = instruction->GetType(); @@ -995,6 +974,7 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { + BlockPoolsScope block_pools(GetVIXLAssembler()); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); if (instruction != nullptr) { @@ -1130,6 +1110,83 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } } +void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + if (Primitive::IsFloatingPointType(instruction->GetType())) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } +} + +void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + BlockPoolsScope block_pools(GetVIXLAssembler()); + + MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); + + if (field_info.IsVolatile()) { + if (use_acquire_release) { + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); + } else { + codegen_->Load(field_info.GetFieldType(), OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(field_info.GetFieldType(), OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } +} + +void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + BlockPoolsScope block_pools(GetVIXLAssembler()); + + Register obj = InputRegisterAt(instruction, 0); + CPURegister value = InputCPURegisterAt(instruction, 1); + Offset offset = field_info.GetFieldOffset(); + Primitive::Type field_type = field_info.GetFieldType(); + bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); + + if (field_info.IsVolatile()) { + if (use_acquire_release) { + codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + codegen_->MarkGCCard(obj, Register(value)); + } +} + void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { Primitive::Type type = instr->GetType(); @@ -1264,7 +1321,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); MemOperand source = HeapOperand(obj); - UseScratchRegisterScope temps(GetVIXLAssembler()); + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + BlockPoolsScope block_pools(masm); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); @@ -1287,22 +1346,23 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { } void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { + BlockPoolsScope block_pools(GetVIXLAssembler()); __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset())); codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); - bool is_object = value_type == Primitive::kPrimNot; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall); - if (is_object) { + if (instruction->NeedsTypeCheck()) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); } else { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { @@ -1315,31 +1375,42 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); - if (value_type == Primitive::kPrimNot) { + LocationSummary* locations = instruction->GetLocations(); + bool needs_runtime_call = locations->WillCall(); + + if (needs_runtime_call) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } else { - LocationSummary* locations = instruction->GetLocations(); Register obj = InputRegisterAt(instruction, 0); CPURegister value = InputCPURegisterAt(instruction, 2); Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); MemOperand destination = HeapOperand(obj); - UseScratchRegisterScope temps(GetVIXLAssembler()); + MacroAssembler* masm = GetVIXLAssembler(); + BlockPoolsScope block_pools(masm); + { + // We use a block to end the scratch scope before the write barrier, thus + // freeing the temporary registers so they can be used in `MarkGCCard`. + UseScratchRegisterScope temps(masm); + + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); + destination = HeapOperand(obj, offset); + } else { + Register temp = temps.AcquireSameSizeAs(obj); + Register index_reg = InputRegisterAt(instruction, 1); + __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); + destination = HeapOperand(temp, offset); + } - if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); - destination = HeapOperand(obj, offset); - } else { - Register temp = temps.AcquireSameSizeAs(obj); - Register index_reg = InputRegisterAt(instruction, 1); - __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); - destination = HeapOperand(temp, offset); + codegen_->Store(value_type, value, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) { + codegen_->MarkGCCard(obj, value.W()); } - - codegen_->Store(value_type, value, destination); - codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -1381,8 +1452,10 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ Cbz(obj, slow_path->GetExitLabel()); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ Cbz(obj, slow_path->GetExitLabel()); + } // Compare the class of `obj` with `cls`. __ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset())); __ Cmp(obj_cls, cls); @@ -1750,72 +1823,19 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->GetType())) { - locations->SetOut(Location::RequiresFpuRegister()); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + HandleFieldGet(instruction); } void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - - if (instruction->IsVolatile()) { - if (use_acquire_release) { - // NB: LoadAcquire will record the pc info if needed. - codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); - } else { - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // For IRIW sequential consistency kLoadAny is not sufficient. - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - } - } else { - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + HandleFieldSet(instruction); } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - Register obj = InputRegisterAt(instruction, 0); - CPURegister value = InputCPURegisterAt(instruction, 1); - Offset offset = instruction->GetFieldOffset(); - Primitive::Type field_type = instruction->GetFieldType(); - bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - - if (instruction->IsVolatile()) { - if (use_acquire_release) { - codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } else { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); - codegen_->Store(field_type, value, HeapOperand(obj, offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - } - } else { - codegen_->Store(field_type, value, HeapOperand(obj, offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - codegen_->MarkGCCard(obj, Register(value)); - } + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { @@ -1837,9 +1857,11 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { vixl::Label done; // Return 0 if `obj` is null. - // TODO: Avoid this check if we know `obj` is not null. - __ Mov(out, 0); - __ Cbz(obj, &done); + // Avoid null check if we know `obj` is not null. + if (instruction->MustDoNullCheck()) { + __ Mov(out, 0); + __ Cbz(obj, &done); + } // Compare the class of `obj` with `cls`. __ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset())); @@ -1914,7 +1936,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok // The register ip1 is required to be used for the hidden argument in // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. - UseScratchRegisterScope scratch_scope(GetVIXLAssembler()); + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope scratch_scope(masm); + BlockPoolsScope block_pools(masm); scratch_scope.Exclude(ip1); __ Mov(ip1, invoke->GetDexMethodIndex()); @@ -2000,6 +2024,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir return; } + BlockPoolsScope block_pools(GetVIXLAssembler()); Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0)); codegen_->GenerateStaticOrDirectCall(invoke, temp); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -2018,6 +2043,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); + BlockPoolsScope block_pools(GetVIXLAssembler()); + // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ Ldr(temp, MemOperand(sp, receiver.GetStackIndex())); @@ -2318,8 +2345,9 @@ void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instru if (codegen_->CanMoveNullCheckToUser(instruction)) { return; } - Location obj = instruction->GetLocations()->InAt(0); + BlockPoolsScope block_pools(GetVIXLAssembler()); + Location obj = instruction->GetLocations()->InAt(0); __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } @@ -2446,6 +2474,14 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + memory_barrier->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); +} + void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); Primitive::Type return_type = instruction->InputAt(0)->GetType(); @@ -2519,67 +2555,19 @@ void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { } void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->GetType())) { - locations->SetOut(Location::RequiresFpuRegister()); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + HandleFieldGet(instruction); } void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - - if (instruction->IsVolatile()) { - if (use_acquire_release) { - // NB: LoadAcquire will record the pc info if needed. - codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); - } else { - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); - // For IRIW sequential consistency kLoadAny is not sufficient. - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - } - } else { - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); - } + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + HandleFieldSet(instruction); } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - Register cls = InputRegisterAt(instruction, 0); - CPURegister value = InputCPURegisterAt(instruction, 1); - Offset offset = instruction->GetFieldOffset(); - Primitive::Type field_type = instruction->GetFieldType(); - bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - - if (instruction->IsVolatile()) { - if (use_acquire_release) { - codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset)); - } else { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); - codegen_->Store(field_type, value, HeapOperand(cls, offset)); - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - } - } else { - codegen_->Store(field_type, value, HeapOperand(cls, offset)); - } - - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - codegen_->MarkGCCard(cls, Register(value)); - } + HandleFieldSet(instruction, instruction->GetFieldInfo()); } void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 07c6dd059a..5a358671cc 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -159,6 +159,8 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); @@ -185,8 +187,10 @@ class LocationsBuilderARM64 : public HGraphVisitor { private: void HandleBinaryOp(HBinaryOperation* instr); - void HandleShift(HBinaryOperation* instr); + void HandleFieldSet(HInstruction* instruction); + void HandleFieldGet(HInstruction* instruction); void HandleInvoke(HInvoke* instr); + void HandleShift(HBinaryOperation* instr); CodeGeneratorARM64* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -194,15 +198,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64); }; -class ParallelMoveResolverARM64 : public ParallelMoveResolver { +class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { public: ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {} + protected: + void PrepareForEmitNativeCode() OVERRIDE; + void FinishEmitNativeCode() OVERRIDE; + Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE; + void FreeScratchLocation(Location loc) OVERRIDE; void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; private: Arm64Assembler* GetAssembler() const; @@ -211,6 +217,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver { } CodeGeneratorARM64* const codegen_; + vixl::UseScratchRegisterScope vixl_temps_; DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64); }; @@ -318,7 +325,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // locations, and is used for optimisation and debugging. void MoveLocation(Location destination, Location source, Primitive::Type type = Primitive::kPrimVoid); - void SwapLocations(Location loc_1, Location loc_2); void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0f1175563e..c604842d86 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -877,7 +877,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { - int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); if (constant == 0) { __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { @@ -1120,6 +1120,14 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant) UNUSED(constant); } +void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + memory_barrier->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); +} + void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -1212,6 +1220,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec codegen_->GenerateStaticOrDirectCall( invoke, invoke->GetLocations()->GetTemp(0).AsRegister<Register>()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -1547,10 +1556,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimDouble: @@ -1580,10 +1587,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimFloat: @@ -1804,37 +1809,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-float' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // Operations use doubles for precision reasons (each 32-bit - // half of a long fits in the 53-bit mantissa of a double, - // but not in the 24-bit mantissa of a float). This is - // especially important for the low bits. The result is - // eventually converted to float. - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // result = double-to-float(result) - __ cvtsd2ss(result, result); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsStackSlot()) { + __ fstps(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstps(Address(ESP, 0)); + Location stack_temp = Location::StackSlot(0); + codegen_->Move32(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -1863,29 +1862,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-double' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsDoubleStackSlot()) { + __ fstpl(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstpl(Address(ESP, 0)); + Location stack_temp = Location::DoubleStackSlot(0); + codegen_->Move64(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -2225,24 +2226,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } -void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float) { +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, + uint32_t temp_offset, + uint32_t stack_adjustment, + bool is_fp, + bool is_wide) { if (source.IsStackSlot()) { - DCHECK(is_float); - __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(!is_wide); + if (is_fp) { + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else if (source.IsDoubleStackSlot()) { - DCHECK(!is_float); - __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(is_wide); + if (is_fp) { + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else { // Write the value to the temporary location on the stack and load to FP stack. - if (is_float) { + if (!is_wide) { Location stack_temp = Location::StackSlot(temp_offset); codegen_->Move32(stack_temp, source); - __ flds(Address(ESP, temp_offset)); + if (is_fp) { + __ flds(Address(ESP, temp_offset)); + } else { + __ filds(Address(ESP, temp_offset)); + } } else { Location stack_temp = Location::DoubleStackSlot(temp_offset); codegen_->Move64(stack_temp, source); - __ fldl(Address(ESP, temp_offset)); + if (is_fp) { + __ fldl(Address(ESP, temp_offset)); + } else { + __ fildl(Address(ESP, temp_offset)); + } } } } @@ -2261,8 +2281,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ subl(ESP, Immediate(2 * elem_size)); // Load the values to the FP stack in reverse order, using temporaries if needed. - PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); - PushOntoFPStack(first, 0, 2 * elem_size, is_float); + const bool is_wide = !is_float; + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); // Loop doing FPREM until we stabilize. Label retry; @@ -3098,7 +3119,6 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, } DCHECK(!IsLeafMethod()); - RecordPcInfo(invoke, invoke->GetDexPc()); } void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { @@ -4230,9 +4250,11 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { SlowPathCodeX86* slow_path = nullptr; // Return 0 if `obj` is null. - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, &zero); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, &zero); + } __ movl(out, Address(obj, class_offset)); // Compare the class of `obj` with `cls`. if (cls.IsRegister()) { @@ -4257,8 +4279,12 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ movl(out, Immediate(1)); __ jmp(&done); } - __ Bind(&zero); - __ movl(out, Immediate(0)); + + if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) { + __ Bind(&zero); + __ movl(out, Immediate(0)); + } + if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -4283,11 +4309,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, slow_path->GetExitLabel()); - __ movl(temp, Address(obj, class_offset)); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, slow_path->GetExitLabel()); + } + __ movl(temp, Address(obj, class_offset)); // Compare the class of `obj` with `cls`. if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 368ae0fb0e..8bd3cd3585 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -93,10 +93,10 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; -class ParallelMoveResolverX86 : public ParallelMoveResolver { +class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) OVERRIDE; void EmitSwap(size_t index) OVERRIDE; @@ -174,8 +174,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. + // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float); + uint32_t stack_adjustment, bool is_fp, bool is_wide); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5b681fa62b..47425fb9ae 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1023,14 +1023,14 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1))); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister()); break; } @@ -1052,24 +1052,46 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { CpuRegister left_reg = left.AsRegister<CpuRegister>(); if (right.IsConstant()) { int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(value)); - if (value == 0) { - __ testq(left_reg, left_reg); + if (IsInt<32>(value)) { + if (value == 0) { + __ testq(left_reg, left_reg); + } else { + __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); + } } else { - __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); + // Value won't fit in an int. + __ cmpq(left_reg, codegen_->LiteralInt64Address(value)); } + } else if (right.IsDoubleStackSlot()) { + __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); } else { __ cmpq(left_reg, right.AsRegister<CpuRegister>()); } break; } case Primitive::kPrimFloat: { - __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); + if (right.IsConstant()) { + float value = right.GetConstant()->AsFloatConstant()->GetValue(); + __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value)); + } else if (right.IsStackSlot()) { + __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); + } else { + __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); + } __ j(kUnordered, compare->IsGtBias() ? &greater : &less); break; } case Primitive::kPrimDouble: { - __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); + if (right.IsConstant()) { + double value = right.GetConstant()->AsDoubleConstant()->GetValue(); + __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value)); + } else if (right.IsDoubleStackSlot()) { + __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); + } else { + __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); + } __ j(kUnordered, compare->IsGtBias() ? &greater : &less); break; } @@ -1145,6 +1167,14 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* consta UNUSED(constant); } +void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + memory_barrier->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); +} + void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -1170,8 +1200,7 @@ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: - locations->SetInAt(0, - Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); break; default: @@ -1411,7 +1440,6 @@ void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); break; @@ -1439,26 +1467,22 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { case Primitive::kPrimFloat: { DCHECK(in.Equals(out)); - CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>(); - XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement float negation with an exclusive or with value // 0x80000000 (mask for bit 31, representing the sign of a // single-precision floating-point number). - __ movq(constant, Immediate(INT64_C(0x80000000))); - __ movd(mask, constant); + __ movss(mask, codegen_->LiteralInt32Address(0x80000000)); __ xorps(out.AsFpuRegister<XmmRegister>(), mask); break; } case Primitive::kPrimDouble: { DCHECK(in.Equals(out)); - CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>(); - XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement double negation with an exclusive or with value // 0x8000000000000000 (mask for bit 63, representing the sign of // a double-precision floating-point number). - __ movq(constant, Immediate(INT64_C(0x8000000000000000))); - __ movd(mask, constant); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000))); __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); break; } @@ -1605,19 +1629,19 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-float' instruction. - locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -1636,19 +1660,19 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: // Processing a Dex `float-to-double' instruction. - locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -1902,17 +1926,56 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-float' instruction. - __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + if (in.IsRegister()) { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + } else if (in.IsConstant()) { + int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); + } + } else { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), false); + } break; case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + if (in.IsRegister()) { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + } else if (in.IsConstant()) { + int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); + } + } else { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), true); + } break; case Primitive::kPrimDouble: // Processing a Dex `double-to-float' instruction. - __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + if (in.IsFpuRegister()) { + __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + } else if (in.IsConstant()) { + double v = in.GetConstant()->AsDoubleConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (bit_cast<int64_t, double>(v) == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); + } + } else { + __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex())); + } break; default: @@ -1930,17 +1993,56 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-double' instruction. - __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + if (in.IsRegister()) { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + } else if (in.IsConstant()) { + int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); + } + } else { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), false); + } break; case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + if (in.IsRegister()) { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + } else if (in.IsConstant()) { + int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); + } + } else { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), true); + } break; case Primitive::kPrimFloat: // Processing a Dex `float-to-double' instruction. - __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + if (in.IsFpuRegister()) { + __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + } else if (in.IsConstant()) { + float v = in.GetConstant()->AsFloatConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (bit_cast<int32_t, float>(v) == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); + } + } else { + __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex())); + } break; default: @@ -3120,7 +3222,7 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); } if (needs_write_barrier) { // Temporary registers for the write barrier. @@ -3147,24 +3249,46 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movb(Address(base, offset), Immediate(v)); + } else { + __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimLong: { - __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(base, offset), Immediate(v_32)); + } else { + __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } @@ -3283,8 +3407,7 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { @@ -3423,7 +3546,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { 1, Location::RegisterOrConstant(instruction->InputAt(1))); locations->SetInAt(2, Location::RequiresRegister()); if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { @@ -3511,8 +3634,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(Address(obj, offset), value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; - __ movl(Address(obj, offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(obj, offset), Immediate(v)); } } else { DCHECK(index.IsRegister()) << index; @@ -3521,8 +3644,9 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + Immediate(v)); } } codegen_->MaybeRecordImplicitNullCheck(instruction); @@ -3546,12 +3670,25 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - DCHECK(value.IsRegister()); - __ movq(Address(obj, offset), value.AsRegister<CpuRegister>()); + if (value.IsRegister()) { + __ movq(Address(obj, offset), value.AsRegister<CpuRegister>()); + } else { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(obj, offset), Immediate(v_32)); + } } else { - DCHECK(value.IsRegister()); - __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), - value.AsRegister<CpuRegister>()); + if (value.IsRegister()) { + __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), + value.AsRegister<CpuRegister>()); + } else { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), + Immediate(v_32)); + } } codegen_->MaybeRecordImplicitNullCheck(instruction); break; @@ -4044,9 +4181,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { SlowPathCodeX86_64* slow_path = nullptr; // Return 0 if `obj` is null. - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, &zero); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, &zero); + } // Compare the class of `obj` with `cls`. __ movl(out, Address(obj, class_offset)); if (cls.IsRegister()) { @@ -4070,8 +4209,12 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ movl(out, Immediate(1)); __ jmp(&done); } - __ Bind(&zero); - __ movl(out, Immediate(0)); + + if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) { + __ Bind(&zero); + __ movl(out, Immediate(0)); + } + if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -4096,9 +4239,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, slow_path->GetExitLabel()); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, slow_path->GetExitLabel()); + } // Compare the class of `obj` with `cls`. __ movl(temp, Address(obj, class_offset)); if (cls.IsRegister()) { @@ -4137,13 +4282,7 @@ void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instructio DCHECK(instruction->GetResultType() == Primitive::kPrimInt || instruction->GetResultType() == Primitive::kPrimLong); locations->SetInAt(0, Location::RequiresRegister()); - if (instruction->GetType() == Primitive::kPrimInt) { - locations->SetInAt(1, Location::Any()); - } else { - // We can handle 32 bit constants. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); - } + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); } @@ -4204,25 +4343,43 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in if (second.IsConstant()) { second_is_constant = true; value = second.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(value)); } + bool is_int32_value = IsInt<32>(value); if (instruction->IsAnd()) { if (second_is_constant) { - __ andq(first_reg, Immediate(static_cast<int32_t>(value))); + if (is_int32_value) { + __ andq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ andq(first_reg, codegen_->LiteralInt64Address(value)); + } + } else if (second.IsDoubleStackSlot()) { + __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); } else { __ andq(first_reg, second.AsRegister<CpuRegister>()); } } else if (instruction->IsOr()) { if (second_is_constant) { - __ orq(first_reg, Immediate(static_cast<int32_t>(value))); + if (is_int32_value) { + __ orq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ orq(first_reg, codegen_->LiteralInt64Address(value)); + } + } else if (second.IsDoubleStackSlot()) { + __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); } else { __ orq(first_reg, second.AsRegister<CpuRegister>()); } } else { DCHECK(instruction->IsXor()); if (second_is_constant) { - __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); + if (is_int32_value) { + __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ xorq(first_reg, codegen_->LiteralInt64Address(value)); + } + } else if (second.IsDoubleStackSlot()) { + __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); } else { __ xorq(first_reg, second.AsRegister<CpuRegister>()); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index b4876ef161..6cdc82262c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -102,10 +102,10 @@ class SlowPathCodeX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); }; -class ParallelMoveResolverX86_64 : public ParallelMoveResolver { +class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) - : ParallelMoveResolver(allocator), codegen_(codegen) {} + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} void EmitMove(size_t index) OVERRIDE; void EmitSwap(size_t index) OVERRIDE; diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index fc3dd01ef5..91cd60acce 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -20,10 +20,78 @@ namespace art { -void HDeadCodeElimination::Run() { +static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { + int block_id = block->GetBlockId(); + if (visited->IsBitSet(block_id)) { + return; + } + visited->SetBit(block_id); + + HInstruction* last_instruction = block->GetLastInstruction(); + if (last_instruction->IsIf()) { + HIf* if_instruction = last_instruction->AsIf(); + HInstruction* condition = if_instruction->InputAt(0); + if (!condition->IsIntConstant()) { + MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); + MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); + } else if (condition->AsIntConstant()->IsOne()) { + MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); + } else { + DCHECK(condition->AsIntConstant()->IsZero()); + MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); + } + } else { + for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { + MarkReachableBlocks(block->GetSuccessors().Get(i), visited); + } + } +} + +void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction, + block->GetPhis().CountSize() + block->GetInstructions().CountSize()); + } +} + +void HDeadCodeElimination::RemoveDeadBlocks() { + // Classify blocks as reachable/unreachable. + ArenaAllocator* allocator = graph_->GetArena(); + ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false); + MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); + + // Remove all dead blocks. Process blocks in post-order, because removal needs + // the block's chain of dominators. + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (live_blocks.IsBitSet(block->GetBlockId())) { + continue; + } + MaybeRecordDeadBlock(block); + block->DisconnectAndDelete(); + } + + // Connect successive blocks created by dead branches. Order does not matter. + for (HReversePostOrderIterator it(*graph_); !it.Done();) { + HBasicBlock* block = it.Current(); + if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) { + it.Advance(); + continue; + } + HBasicBlock* successor = block->GetSuccessors().Get(0); + if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) { + it.Advance(); + continue; + } + block->MergeWith(successor); + + // Reiterate on this block in case it can be merged with its new successor. + } +} + +void HDeadCodeElimination::RemoveDeadInstructions() { // Process basic blocks in post-order in the dominator tree, so that - // a dead instruction depending on another dead instruction is - // removed. + // a dead instruction depending on another dead instruction is removed. for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { HBasicBlock* block = b.Current(); // Traverse this block's instructions in backward order and remove @@ -38,11 +106,18 @@ void HDeadCodeElimination::Run() { if (!inst->HasSideEffects() && !inst->CanThrow() && !inst->IsSuspendCheck() + && !inst->IsMemoryBarrier() // If we added an explicit barrier then we should keep it. && !inst->HasUses()) { block->RemoveInstruction(inst); + MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction); } } } } +void HDeadCodeElimination::Run() { + RemoveDeadBlocks(); + RemoveDeadInstructions(); +} + } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 3db2c3ff3f..0bea0fc1c2 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -19,6 +19,7 @@ #include "nodes.h" #include "optimization.h" +#include "optimizing_compiler_stats.h" namespace art { @@ -28,8 +29,10 @@ namespace art { */ class HDeadCodeElimination : public HOptimization { public: - explicit HDeadCodeElimination(HGraph* graph) - : HOptimization(graph, true, kDeadCodeEliminationPassName) {} + HDeadCodeElimination(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kDeadCodeEliminationPassName) + : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; @@ -37,6 +40,10 @@ class HDeadCodeElimination : public HOptimization { "dead_code_elimination"; private: + void MaybeRecordDeadBlock(HBasicBlock* block); + void RemoveDeadBlocks(); + void RemoveDeadInstructions(); + DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination); }; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 3a56c6c68f..e1649fd3cd 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -88,23 +88,36 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { // Visit this block's list of phis. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); // Ensure this block's list of phis contains only phis. - if (!it.Current()->IsPhi()) { + if (!current->IsPhi()) { AddError(StringPrintf("Block %d has a non-phi in its phi list.", current_block_->GetBlockId())); } - it.Current()->Accept(this); + if (current->GetNext() == nullptr && current != block->GetLastPhi()) { + AddError(StringPrintf("The recorded last phi of block %d does not match " + "the actual last phi %d.", + current_block_->GetBlockId(), + current->GetId())); + } + current->Accept(this); } // Visit this block's list of instructions. - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); - it.Advance()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); // Ensure this block's list of instructions does not contains phis. - if (it.Current()->IsPhi()) { + if (current->IsPhi()) { AddError(StringPrintf("Block %d has a phi in its non-phi list.", current_block_->GetBlockId())); } - it.Current()->Accept(this); + if (current->GetNext() == nullptr && current != block->GetLastInstruction()) { + AddError(StringPrintf("The recorded last instruction of block %d does not match " + "the actual last instruction %d.", + current_block_->GetBlockId(), + current->GetId())); + } + current->Accept(this); } } @@ -251,6 +264,8 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { } } + const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks(); + // Ensure there is only one back edge per loop. size_t num_back_edges = loop_header->GetLoopInformation()->GetBackEdges().Size(); @@ -263,19 +278,41 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { "Loop defined by header %d has several back edges: %zu.", id, num_back_edges)); + } else { + DCHECK_EQ(num_back_edges, 1u); + int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } } - // Ensure all blocks in the loop are dominated by the loop header. - const ArenaBitVector& loop_blocks = - loop_header->GetLoopInformation()->GetBlocks(); + // Ensure all blocks in the loop are live and dominated by the loop header. for (uint32_t i : loop_blocks.Indexes()) { HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i); - if (!loop_header->Dominates(loop_block)) { + if (loop_block == nullptr) { + AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.", + id, + i)); + } else if (!loop_header->Dominates(loop_block)) { AddError(StringPrintf("Loop block %d not dominated by loop header %d.", - loop_block->GetBlockId(), + i, id)); } } + + // If this is a nested loop, ensure the outer loops contain a superset of the blocks. + for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) { + HLoopInformation* outer_info = it.Current(); + if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) { + AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of " + "an outer loop defined by header %d.", + id, + outer_info->GetHeader()->GetBlockId())); + } + } } void SSAChecker::VisitInstruction(HInstruction* instruction) { @@ -393,8 +430,10 @@ void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_inde static_cast<int>(input_index), value)); } - } else if (input->GetType() == Primitive::kPrimInt && input->IsPhi()) { - // TODO: We need a data-flow analysis which determines if the Phi is boolean. + } else if (input->GetType() == Primitive::kPrimInt + && (input->IsPhi() || input->IsAnd() || input->IsOr() || input->IsXor())) { + // TODO: We need a data-flow analysis to determine if the Phi or + // binary operation is actually Boolean. Allow for now. } else if (input->GetType() != Primitive::kPrimBoolean) { AddError(StringPrintf( "%s instruction %d has a non-Boolean input %d whose type is: %s.", diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 4c283788b5..ca9cbc3d01 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -192,6 +192,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " " << phi->GetRegNumber(); } + void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE { + output_ << " " << barrier->GetBarrierKind(); + } + bool IsPass(const char* name) { return strcmp(pass_name_, name) == 0; } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 74848d5d96..708733e28c 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -55,7 +55,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { buckets_owned_(allocator, num_buckets_, false), num_entries_(to_copy.num_entries_) { // ArenaAllocator returns zeroed memory, so entries of buckets_ and - // buckets_owned_ are initialized to nullptr and false, respectively. + // buckets_owned_ are initialized to null and false, respectively. DCHECK(IsPowerOfTwo(num_buckets_)); if (num_buckets_ == to_copy.num_buckets_) { // Hash table remains the same size. We copy the bucket pointers and leave diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 6d2a8d77e2..bffd639e83 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -190,7 +190,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, } // Run simple optimizations on the graph. - HDeadCodeElimination dce(callee_graph); + HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); InstructionSimplifier simplify(callee_graph, stats_); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index afbc490150..2df7c166d8 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -43,6 +43,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; void VisitEqual(HEqual* equal) OVERRIDE; + void VisitNotEqual(HNotEqual* equal) OVERRIDE; + void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE; void VisitArraySet(HArraySet* equal) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitNullCheck(HNullCheck* instruction) OVERRIDE; @@ -60,6 +62,7 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitSub(HSub* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; + void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -87,10 +90,6 @@ void InstructionSimplifierVisitor::Run() { // current index, so don't advance the iterator. continue; } - if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) { - LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_ - << ") occurred at the current position."; - } simplifications_at_current_position_ = 0; it.Advance(); } @@ -161,6 +160,10 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + if (!check_cast->InputAt(0)->CanBeNull()) { + check_cast->ClearMustDoNullCheck(); + } + if (!load_class->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type @@ -178,6 +181,12 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { } } +void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { + if (!instruction->InputAt(0)->CanBeNull()) { + instruction->ClearMustDoNullCheck(); + } +} + void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) { HBasicBlock* block = check->GetBlock(); // Currently always keep the suspend check at entry. @@ -195,21 +204,62 @@ void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) { } void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { - HInstruction* input1 = equal->InputAt(0); - HInstruction* input2 = equal->InputAt(1); - if (input1->GetType() == Primitive::kPrimBoolean && input2->IsIntConstant()) { - if (input2->AsIntConstant()->GetValue() == 1) { - // Replace (bool_value == 1) with bool_value - equal->ReplaceWith(equal->InputAt(0)); - equal->GetBlock()->RemoveInstruction(equal); - } else { - // We should replace (bool_value == 0) with !bool_value, but we unfortunately - // do not have such instruction. - DCHECK_EQ(input2->AsIntConstant()->GetValue(), 0); + HInstruction* input_const = equal->GetConstantRight(); + if (input_const != nullptr) { + HInstruction* input_value = equal->GetLeastConstantLeft(); + if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) { + HBasicBlock* block = equal->GetBlock(); + if (input_const->AsIntConstant()->IsOne()) { + // Replace (bool_value == true) with bool_value + equal->ReplaceWith(input_value); + block->RemoveInstruction(equal); + RecordSimplification(); + } else { + // Replace (bool_value == false) with !bool_value + DCHECK(input_const->AsIntConstant()->IsZero()); + block->ReplaceAndRemoveInstructionWith( + equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + RecordSimplification(); + } + } + } +} + +void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { + HInstruction* input_const = not_equal->GetConstantRight(); + if (input_const != nullptr) { + HInstruction* input_value = not_equal->GetLeastConstantLeft(); + if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) { + HBasicBlock* block = not_equal->GetBlock(); + if (input_const->AsIntConstant()->IsOne()) { + // Replace (bool_value != true) with !bool_value + block->ReplaceAndRemoveInstructionWith( + not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + RecordSimplification(); + } else { + // Replace (bool_value != false) with bool_value + DCHECK(input_const->AsIntConstant()->IsZero()); + not_equal->ReplaceWith(input_value); + block->RemoveInstruction(not_equal); + RecordSimplification(); + } } } } +void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { + HInstruction* parent = bool_not->InputAt(0); + if (parent->IsBooleanNot()) { + HInstruction* value = parent->InputAt(0); + // Replace (!(!bool_value)) with bool_value + bool_not->ReplaceWith(value); + bool_not->GetBlock()->RemoveInstruction(bool_not); + // It is possible that `parent` is dead at this point but we leave + // its removal to DCE for simplicity. + RecordSimplification(); + } +} + void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { HInstruction* input = instruction->InputAt(0); // If the array is a NewArray with constant size, replace the array length @@ -388,9 +438,16 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { if (Primitive::IsIntOrLongType(type)) { int64_t factor = Int64FromConstant(input_cst); - // We expect the `0` case to have been handled in the constant folding pass. - DCHECK_NE(factor, 0); - if (IsPowerOfTwo(factor)) { + // Even though constant propagation also takes care of the zero case, other + // optimizations can lead to having a zero multiplication. + if (factor == 0) { + // Replace code looking like + // MUL dst, src, 0 + // with + // 0 + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (IsPowerOfTwo(factor)) { // Replace code looking like // MUL dst, src, pow_of_2 // with @@ -424,7 +481,8 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { return; } - if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) { + if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse() && + !Primitive::IsFloatingPointType(input->GetType())) { // Replace code looking like // SUB tmp, a, b // NEG dst, tmp @@ -435,6 +493,7 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { // worse code. In particular, we do not want the live ranges of `a` and `b` // to be extended if we are not sure the initial 'SUB' instruction can be // removed. + // We do not perform optimization for fp because we could lose the sign of zero. HSub* sub = input->AsSub(); HSub* new_sub = new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft()); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 9a6062fedf..932192e4fd 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -863,7 +863,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = locations->InAt(1).AsRegister<Register>(); __ cmp(argument, ShifterOperand(0)); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d3a4e6ca15..117d6a4279 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1007,7 +1007,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = WRegisterFrom(locations->InAt(1)); __ Cmp(argument, 0); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 3c7a2660db..a8e2cdf1f6 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -828,7 +828,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); return; @@ -962,7 +962,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = locations->InAt(1).AsRegister<Register>(); __ testl(argument, argument); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index d9a1c31c77..5d24d1fbfb 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -704,7 +704,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); return; } @@ -732,14 +731,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; X86_64Assembler* assembler = GetAssembler(); - // Generate 0.5 into inPlusPointFive. - __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); - __ movd(inPlusPointFive, out, false); + // Load 0.5 into inPlusPointFive. + __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f)); // Add in the input. __ addss(inPlusPointFive, in); @@ -747,12 +744,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { // And truncate to an integer. __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); - __ movl(out, Immediate(kPrimIntMax)); - // maxInt = int-to-float(out) - __ cvtsi2ss(maxInt, out); - // if inPlusPointFive >= maxInt goto done - __ comiss(inPlusPointFive, maxInt); + __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan @@ -782,14 +775,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; X86_64Assembler* assembler = GetAssembler(); - // Generate 0.5 into inPlusPointFive. - __ movq(out, Immediate(bit_cast<int64_t, double>(0.5))); - __ movd(inPlusPointFive, out, true); + // Load 0.5 into inPlusPointFive. + __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5)); // Add in the input. __ addsd(inPlusPointFive, in); @@ -797,12 +788,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // And truncate to an integer. __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); - __ movq(out, Immediate(kPrimLongMax)); - // maxLong = long-to-double(out) - __ cvtsi2sd(maxLong, out, true); - // if inPlusPointFive >= maxLong goto done - __ comisd(inPlusPointFive, maxLong); + __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan @@ -886,7 +873,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); __ testl(argument, argument); @@ -960,26 +947,48 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1))); } static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); + Location value = locations->InAt(1); // x86 allows unaligned access. We do not have to check the input or use specific instructions // to avoid a SIGBUS. switch (size) { case Primitive::kPrimByte: - __ movb(Address(address, 0), value); + if (value.IsConstant()) { + __ movb(Address(address, 0), + Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); + } else { + __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; case Primitive::kPrimShort: - __ movw(Address(address, 0), value); + if (value.IsConstant()) { + __ movw(Address(address, 0), + Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); + } else { + __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; case Primitive::kPrimInt: - __ movl(Address(address, 0), value); + if (value.IsConstant()) { + __ movl(Address(address, 0), + Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); + } else { + __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; case Primitive::kPrimLong: - __ movq(Address(address, 0), value); + if (value.IsConstant()) { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(address, 0), Immediate(v_32)); + } else { + __ movq(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; default: LOG(FATAL) << "Type not recognized for poke: " << size; diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index de876be9ab..c3a99150c4 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -285,17 +285,26 @@ class Location : public ValueObject { bool Contains(Location other) const { if (Equals(other)) { return true; - } else if (IsFpuRegisterPair() && other.IsFpuRegister()) { - return other.reg() == low() || other.reg() == high(); - } else if (IsRegisterPair() && other.IsRegister()) { - return other.reg() == low() || other.reg() == high(); - } else if (IsDoubleStackSlot() && other.IsStackSlot()) { - return (GetStackIndex() == other.GetStackIndex()) - || (GetStackIndex() + 4 == other.GetStackIndex()); + } else if (IsPair() || IsDoubleStackSlot()) { + return ToLow().Equals(other) || ToHigh().Equals(other); } return false; } + bool OverlapsWith(Location other) const { + // Only check the overlapping case that can happen with our register allocation algorithm. + bool overlap = Contains(other) || other.Contains(*this); + if (kIsDebugBuild && !overlap) { + // Note: These are also overlapping cases. But we are not able to handle them in + // ParallelMoveResolverWithSwap. Make sure that we do not meet such case with our compiler. + if ((IsPair() && other.IsPair()) || (IsDoubleStackSlot() && other.IsDoubleStackSlot())) { + DCHECK(!Contains(other.ToLow())); + DCHECK(!Contains(other.ToHigh())); + } + } + return overlap; + } + const char* DebugString() const { switch (GetKind()) { case kInvalid: return "I"; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 5fca4fab22..3205b5e991 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -416,26 +416,6 @@ static void UpdateInputsUsers(HInstruction* instruction) { DCHECK(!instruction->HasEnvironment()); } -void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { - DCHECK(!cursor->IsPhi()); - DCHECK(!instruction->IsPhi()); - DCHECK_EQ(instruction->GetId(), -1); - DCHECK_NE(cursor->GetId(), -1); - DCHECK_EQ(cursor->GetBlock(), this); - DCHECK(!instruction->IsControlFlow()); - instruction->next_ = cursor; - instruction->previous_ = cursor->previous_; - cursor->previous_ = instruction; - if (GetFirstInstruction() == cursor) { - instructions_.first_instruction_ = instruction; - } else { - instruction->previous_->next_ = instruction; - } - instruction->SetBlock(this); - instruction->SetId(GetGraph()->GetNextInstructionId()); - UpdateInputsUsers(instruction); -} - void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement) { DCHECK(initial->GetBlock() == this); @@ -463,23 +443,27 @@ void HBasicBlock::AddPhi(HPhi* phi) { Add(&phis_, this, phi); } +void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { + DCHECK(!cursor->IsPhi()); + DCHECK(!instruction->IsPhi()); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(instruction); + instructions_.InsertInstructionBefore(instruction, cursor); +} + void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { DCHECK_EQ(phi->GetId(), -1); DCHECK_NE(cursor->GetId(), -1); DCHECK_EQ(cursor->GetBlock(), this); - if (cursor->next_ == nullptr) { - cursor->next_ = phi; - phi->previous_ = cursor; - DCHECK(phi->next_ == nullptr); - } else { - phi->next_ = cursor->next_; - phi->previous_ = cursor; - cursor->next_ = phi; - phi->next_->previous_ = phi; - } phi->SetBlock(this); phi->SetId(GetGraph()->GetNextInstructionId()); UpdateInputsUsers(phi); + phis_.InsertInstructionAfter(phi, cursor); } static void Remove(HInstructionList* instruction_list, @@ -546,6 +530,34 @@ void HInstructionList::AddInstruction(HInstruction* instruction) { } } +void HInstructionList::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { + DCHECK(Contains(cursor)); + if (cursor == first_instruction_) { + cursor->previous_ = instruction; + instruction->next_ = cursor; + first_instruction_ = instruction; + } else { + instruction->previous_ = cursor->previous_; + instruction->next_ = cursor; + cursor->previous_ = instruction; + instruction->previous_->next_ = instruction; + } +} + +void HInstructionList::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) { + DCHECK(Contains(cursor)); + if (cursor == last_instruction_) { + cursor->next_ = instruction; + instruction->previous_ = cursor; + last_instruction_ = instruction; + } else { + instruction->next_ = cursor->next_; + instruction->previous_ = cursor; + cursor->next_ = instruction; + instruction->next_->previous_ = instruction; + } +} + void HInstructionList::RemoveInstruction(HInstruction* instruction) { if (instruction->previous_ != nullptr) { instruction->previous_->next_ = instruction->next_; @@ -660,6 +672,11 @@ void HPhi::AddInput(HInstruction* input) { input->AddUseAt(this, inputs_.Size() - 1); } +void HPhi::RemoveInputAt(size_t index) { + RemoveAsUserOfInput(index); + inputs_.DeleteAt(index); +} + #define DEFINE_ACCEPT(name, super) \ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ @@ -702,7 +719,7 @@ HConstant* HUnaryOperation::TryStaticEvaluation() const { // TODO: Implement static evaluation of long unary operations. // // Do not exit with a fatal condition here. Instead, simply - // return `nullptr' to notify the caller that this instruction + // return `null' to notify the caller that this instruction // cannot (yet) be statically evaluated. return nullptr; } @@ -738,7 +755,7 @@ HConstant* HBinaryOperation::GetConstantRight() const { } // If `GetConstantRight()` returns one of the input, this returns the other -// one. Otherwise it returns nullptr. +// one. Otherwise it returns null. HInstruction* HBinaryOperation::GetLeastConstantLeft() const { HInstruction* most_constant_right = GetConstantRight(); if (most_constant_right == nullptr) { @@ -855,6 +872,15 @@ bool HBasicBlock::HasSinglePhi() const { return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr; } +size_t HInstructionList::CountSize() const { + size_t size = 0; + HInstruction* current = first_instruction_; + for (; current != nullptr; current = current->GetNext()) { + size++; + } + return size; +} + void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const { for (HInstruction* current = first_instruction_; current != nullptr; @@ -886,40 +912,167 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } -void HBasicBlock::DisconnectFromAll() { - DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario"; +void HBasicBlock::DisconnectAndDelete() { + // Dominators must be removed after all the blocks they dominate. This way + // a loop header is removed last, a requirement for correct loop information + // iteration. + DCHECK(dominated_blocks_.IsEmpty()); + + // Remove the block from all loops it is included in. + for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) { + HLoopInformation* loop_info = it.Current(); + loop_info->Remove(this); + if (loop_info->IsBackEdge(*this)) { + // This deliberately leaves the loop in an inconsistent state and will + // fail SSAChecker unless the entire loop is removed during the pass. + loop_info->RemoveBackEdge(this); + } + } + // Disconnect the block from its predecessors and update their control-flow + // instructions. for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) { - predecessors_.Get(i)->successors_.Delete(this); + HBasicBlock* predecessor = predecessors_.Get(i); + HInstruction* last_instruction = predecessor->GetLastInstruction(); + predecessor->RemoveInstruction(last_instruction); + predecessor->RemoveSuccessor(this); + if (predecessor->GetSuccessors().Size() == 1u) { + DCHECK(last_instruction->IsIf()); + predecessor->AddInstruction(new (graph_->GetArena()) HGoto()); + } else { + // The predecessor has no remaining successors and therefore must be dead. + // We deliberately leave it without a control-flow instruction so that the + // SSAChecker fails unless it is not removed during the pass too. + DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u); + } } + predecessors_.Reset(); + + // Disconnect the block from its successors and update their dominators + // and phis. for (size_t i = 0, e = successors_.Size(); i < e; ++i) { - successors_.Get(i)->predecessors_.Delete(this); - } - dominator_->dominated_blocks_.Delete(this); + HBasicBlock* successor = successors_.Get(i); + // Delete this block from the list of predecessors. + size_t this_index = successor->GetPredecessorIndexOf(this); + successor->predecessors_.DeleteAt(this_index); + + // Check that `successor` has other predecessors, otherwise `this` is the + // dominator of `successor` which violates the order DCHECKed at the top. + DCHECK(!successor->predecessors_.IsEmpty()); + + // Recompute the successor's dominator. + HBasicBlock* old_dominator = successor->GetDominator(); + HBasicBlock* new_dominator = successor->predecessors_.Get(0); + for (size_t j = 1, f = successor->predecessors_.Size(); j < f; ++j) { + new_dominator = graph_->FindCommonDominator( + new_dominator, successor->predecessors_.Get(j)); + } + if (old_dominator != new_dominator) { + successor->SetDominator(new_dominator); + old_dominator->RemoveDominatedBlock(successor); + new_dominator->AddDominatedBlock(successor); + } - predecessors_.Reset(); + // Remove this block's entries in the successor's phis. + if (successor->predecessors_.Size() == 1u) { + // The successor has just one predecessor left. Replace phis with the only + // remaining input. + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + phi->ReplaceWith(phi->InputAt(1 - this_index)); + successor->RemovePhi(phi); + } + } else { + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + } + } + } successors_.Reset(); - dominator_ = nullptr; - graph_ = nullptr; + + // Disconnect from the dominator. + dominator_->RemoveDominatedBlock(this); + SetDominator(nullptr); + + // Delete from the graph. The function safely deletes remaining instructions + // and updates the reverse post order. + graph_->DeleteDeadBlock(this); + SetGraph(nullptr); } void HBasicBlock::MergeWith(HBasicBlock* other) { - DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario"; - DCHECK(dominated_blocks_.IsEmpty() - || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other)) - << "Unimplemented block merge scenario"; + DCHECK_EQ(GetGraph(), other->GetGraph()); + DCHECK(GetDominatedBlocks().Contains(other)); + DCHECK_EQ(GetSuccessors().Size(), 1u); + DCHECK_EQ(GetSuccessors().Get(0), other); + DCHECK_EQ(other->GetPredecessors().Size(), 1u); + DCHECK_EQ(other->GetPredecessors().Get(0), this); DCHECK(other->GetPhis().IsEmpty()); + // Move instructions from `other` to `this`. + DCHECK(EndsWithControlFlowInstruction()); + RemoveInstruction(GetLastInstruction()); + instructions_.Add(other->GetInstructions()); + other->instructions_.SetBlockOfInstructions(this); + other->instructions_.Clear(); + + // Remove `other` from the loops it is included in. + for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) { + HLoopInformation* loop_info = it.Current(); + loop_info->Remove(other); + if (loop_info->IsBackEdge(*other)) { + loop_info->ClearBackEdges(); + loop_info->AddBackEdge(this); + } + } + + // Update links to the successors of `other`. successors_.Reset(); - dominated_blocks_.Reset(); + while (!other->successors_.IsEmpty()) { + HBasicBlock* successor = other->successors_.Get(0); + successor->ReplacePredecessor(other, this); + } + + // Update the dominator tree. + dominated_blocks_.Delete(other); + for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { + HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); + dominated_blocks_.Add(dominated); + dominated->SetDominator(this); + } + other->dominated_blocks_.Reset(); + other->dominator_ = nullptr; + + // Clear the list of predecessors of `other` in preparation of deleting it. + other->predecessors_.Reset(); + + // Delete `other` from the graph. The function updates reverse post order. + graph_->DeleteDeadBlock(other); + other->SetGraph(nullptr); +} + +void HBasicBlock::MergeWithInlined(HBasicBlock* other) { + DCHECK_NE(GetGraph(), other->GetGraph()); + DCHECK(GetDominatedBlocks().IsEmpty()); + DCHECK(GetSuccessors().IsEmpty()); + DCHECK(!EndsWithControlFlowInstruction()); + DCHECK_EQ(other->GetPredecessors().Size(), 1u); + DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock()); + DCHECK(other->GetPhis().IsEmpty()); + DCHECK(!other->IsInLoop()); + + // Move instructions from `other` to `this`. instructions_.Add(other->GetInstructions()); - other->GetInstructions().SetBlockOfInstructions(this); + other->instructions_.SetBlockOfInstructions(this); - while (!other->GetSuccessors().IsEmpty()) { - HBasicBlock* successor = other->GetSuccessors().Get(0); + // Update links to the successors of `other`. + successors_.Reset(); + while (!other->successors_.IsEmpty()) { + HBasicBlock* successor = other->successors_.Get(0); successor->ReplacePredecessor(other, this); } + // Update the dominator tree. for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); dominated_blocks_.Add(dominated); @@ -961,6 +1114,24 @@ static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks, } } +void HGraph::DeleteDeadBlock(HBasicBlock* block) { + DCHECK_EQ(block->GetGraph(), this); + DCHECK(block->GetSuccessors().IsEmpty()); + DCHECK(block->GetPredecessors().IsEmpty()); + DCHECK(block->GetDominatedBlocks().IsEmpty()); + DCHECK(block->GetDominator() == nullptr); + + for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + block->RemoveInstruction(it.Current()); + } + for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + block->RemovePhi(it.Current()->AsPhi()); + } + + reverse_post_order_.Delete(block); + blocks_.Put(block->GetBlockId(), nullptr); +} + void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (GetBlocks().Size() == 3) { // Simple case of an entry block, a body block, and an exit block. @@ -993,7 +1164,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { HBasicBlock* first = entry_block_->GetSuccessors().Get(0); DCHECK(!first->IsInLoop()); - at->MergeWith(first); + at->MergeWithInlined(first); exit_block_->ReplaceWith(to); // Update all predecessors of the exit block (now the `to` block) @@ -1064,8 +1235,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(current); outer_graph->reverse_post_order_.Put(++index_of_at, current); if (info != nullptr) { - info->Add(current); current->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + loop_it.Current()->Add(current); + } } } } @@ -1075,8 +1248,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(to); outer_graph->reverse_post_order_.Put(++index_of_at, to); if (info != nullptr) { - info->Add(to); to->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + loop_it.Current()->Add(to); + } if (info->IsBackEdge(*at)) { // Only `at` can become a back edge, as the inlined blocks // are predecessors of `at`. @@ -1121,53 +1296,6 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } -void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) { - // Find the two branches of an If. - DCHECK_EQ(start_block->GetSuccessors().Size(), 2u); - HBasicBlock* left_branch = start_block->GetSuccessors().Get(0); - HBasicBlock* right_branch = start_block->GetSuccessors().Get(1); - - // Make sure this is a diamond control-flow path. - DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block); - DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block); - DCHECK_EQ(end_block->GetPredecessors().Size(), 2u); - DCHECK_EQ(start_block, end_block->GetDominator()); - - // Disconnect the branches and merge the two blocks. This will move - // all instructions from 'end_block' to 'start_block'. - DCHECK(left_branch->IsSingleGoto()); - DCHECK(right_branch->IsSingleGoto()); - left_branch->DisconnectFromAll(); - right_branch->DisconnectFromAll(); - start_block->RemoveInstruction(start_block->GetLastInstruction()); - start_block->MergeWith(end_block); - - // Delete the now redundant blocks from the graph. - blocks_.Put(left_branch->GetBlockId(), nullptr); - blocks_.Put(right_branch->GetBlockId(), nullptr); - blocks_.Put(end_block->GetBlockId(), nullptr); - - // Update reverse post order. - reverse_post_order_.Delete(left_branch); - reverse_post_order_.Delete(right_branch); - reverse_post_order_.Delete(end_block); - - // Update loops which contain the code. - for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) { - HLoopInformation* loop_info = it.Current(); - DCHECK(loop_info->Contains(*left_branch)); - DCHECK(loop_info->Contains(*right_branch)); - DCHECK(loop_info->Contains(*end_block)); - loop_info->Remove(left_branch); - loop_info->Remove(right_branch); - loop_info->Remove(end_block); - if (loop_info->IsBackEdge(*end_block)) { - loop_info->RemoveBackEdge(end_block); - loop_info->AddBackEdge(start_block); - } - } -} - std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 649038b532..18a8225f55 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -19,6 +19,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" +#include "dex/compiler_enums.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" @@ -74,6 +75,10 @@ class HInstructionList { void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); + // Insert `instruction` before/after an existing instruction `cursor`. + void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); + void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor); + // Return true if this list contains `instruction`. bool Contains(HInstruction* instruction) const; @@ -92,6 +97,9 @@ class HInstructionList { void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list); void Add(const HInstructionList& instruction_list); + // Return the number of instructions in the list. This is an expensive operation. + size_t CountSize() const; + private: HInstruction* first_instruction_; HInstruction* last_instruction_; @@ -163,7 +171,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); - void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block); + // Removes `block` from the graph. + void DeleteDeadBlock(HBasicBlock* block); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); void SimplifyLoop(HBasicBlock* header); @@ -243,8 +252,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return CreateConstant(value, &cached_long_constants_); } - private: HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; + + private: void VisitBlockForDominatorTree(HBasicBlock* block, HBasicBlock* predecessor, GrowableArray<size_t>* visits); @@ -446,6 +456,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HBasicBlock* GetDominator() const { return dominator_; } void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; } void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); } + void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); } void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) { for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) { if (dominated_blocks_.Get(i) == existing) { @@ -466,8 +477,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HInstruction* GetFirstInstruction() const { return instructions_.first_instruction_; } HInstruction* GetLastInstruction() const { return instructions_.last_instruction_; } const HInstructionList& GetInstructions() const { return instructions_; } - const HInstructionList& GetPhis() const { return phis_; } HInstruction* GetFirstPhi() const { return phis_.first_instruction_; } + HInstruction* GetLastPhi() const { return phis_.last_instruction_; } + const HInstructionList& GetPhis() const { return phis_; } void AddSuccessor(HBasicBlock* block) { successors_.Add(block); @@ -544,7 +556,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // that this method does not update the graph, reverse post order, loop // information, nor make sure the blocks are consistent (for example ending // with a control flow instruction). - void MergeWith(HBasicBlock* other); + void MergeWithInlined(HBasicBlock* other); // Replace `this` with `other`. Predecessors, successors, and dominated blocks // of `this` are moved to `other`. @@ -553,12 +565,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // with a control flow instruction). void ReplaceWith(HBasicBlock* other); - // Disconnects `this` from all its predecessors, successors and the dominator. - // It assumes that `this` does not dominate any blocks. - // Note that this method does not update the graph, reverse post order, loop - // information, nor make sure the blocks are consistent (for example ending - // with a control flow instruction). - void DisconnectFromAll(); + // Merge `other` at the end of `this`. This method updates loops, reverse post + // order, links to predecessors, successors, dominators and deletes the block + // from the graph. The two blocks must be successive, i.e. `this` the only + // predecessor of `other` and vice versa. + void MergeWith(HBasicBlock* other); + + // Disconnects `this` from all its predecessors, successors and dominator, + // removes it from all loops it is included in and eventually from the graph. + // The block must not dominate any other block. Predecessors and successors + // are safely updated. + void DisconnectAndDelete(); void AddInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); @@ -718,6 +735,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LoadString, Instruction) \ M(Local, Instruction) \ M(LongConstant, Constant) \ + M(MemoryBarrier, Instruction) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(Neg, UnaryOperation) \ @@ -908,6 +926,12 @@ class HUserRecord : public ValueObject { HUseListNode<T>* use_node_; }; +// TODO: Add better documentation to this class and maybe refactor with more suggestive names. +// - Has(All)SideEffects suggests that all the side effects are present but only ChangesSomething +// flag is consider. +// - DependsOn suggests that there is a real dependency between side effects but it only +// checks DependendsOnSomething flag. +// // Represents the side effects an instruction may have. class SideEffects : public ValueObject { public: @@ -1141,8 +1165,6 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } - virtual bool ActAsNullConstant() const { return false; } - // Does not apply for all instructions, but having this at top level greatly // simplifies the null check elimination. virtual bool CanBeNull() const { @@ -1150,7 +1172,10 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { return true; } - virtual bool CanDoImplicitNullCheck() const { return false; } + virtual bool CanDoImplicitNullCheckOn(HInstruction* obj) const { + UNUSED(obj); + return false; + } void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) { DCHECK_EQ(GetType(), Primitive::kPrimNot); @@ -1618,7 +1643,7 @@ class HUnaryOperation : public HExpression<1> { // Try to statically evaluate `operation` and return a HConstant // containing the result of this evaluation. If `operation` cannot - // be evaluated as a constant, return nullptr. + // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; // Apply this operation to `x`. @@ -1686,7 +1711,7 @@ class HBinaryOperation : public HExpression<2> { // Try to statically evaluate `operation` and return a HConstant // containing the result of this evaluation. If `operation` cannot - // be evaluated as a constant, return nullptr. + // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; // Apply this operation to `x` and `y`. @@ -1694,11 +1719,11 @@ class HBinaryOperation : public HExpression<2> { virtual int64_t Evaluate(int64_t x, int64_t y) const = 0; // Returns an input that can legally be used as the right input and is - // constant, or nullptr. + // constant, or null. HConstant* GetConstantRight() const; // If `GetConstantRight()` returns one of the input, this returns the other - // one. Otherwise it returns nullptr. + // one. Otherwise it returns null. HInstruction* GetLeastConstantLeft() const; DECLARE_INSTRUCTION(BinaryOperation); @@ -2064,8 +2089,6 @@ class HNullConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return 0; } - bool ActAsNullConstant() const OVERRIDE { return true; } - DECLARE_INSTRUCTION(NullConstant); private: @@ -2087,11 +2110,6 @@ class HIntConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return GetValue(); } - // TODO: Null is represented by the `0` constant. In most cases we replace it - // with a HNullConstant but we don't do it when comparing (a != null). This - // method is an workaround until we fix the above. - bool ActAsNullConstant() const OVERRIDE { return value_ == 0; } - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } bool IsZero() const OVERRIDE { return GetValue() == 0; } bool IsOne() const OVERRIDE { return GetValue() == 1; } @@ -2105,7 +2123,7 @@ class HIntConstant : public HConstant { friend class HGraph; ART_FRIEND_TEST(GraphTest, InsertInstructionBefore); - ART_FRIEND_TEST(ParallelMoveTest, ConstantLast); + ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; @@ -2162,7 +2180,7 @@ class HInvoke : public HInstruction { uint32_t GetDexMethodIndex() const { return dex_method_index_; } - Intrinsics GetIntrinsic() { + Intrinsics GetIntrinsic() const { return intrinsic_; } @@ -2217,7 +2235,8 @@ class HInvokeStaticOrDirect : public HInvoke { invoke_type_(invoke_type), is_recursive_(is_recursive) {} - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + UNUSED(obj); // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. return false; @@ -2249,9 +2268,9 @@ class HInvokeVirtual : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { // TODO: Add implicit null checks in intrinsics. - return !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); } uint32_t GetVTableIndex() const { return vtable_index_; } @@ -2275,9 +2294,9 @@ class HInvokeInterface : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { // TODO: Add implicit null checks in intrinsics. - return !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); } uint32_t GetImtIndex() const { return imt_index_; } @@ -2738,6 +2757,7 @@ class HPhi : public HInstruction { size_t InputCount() const OVERRIDE { return inputs_.Size(); } void AddInput(HInstruction* input); + void RemoveInputAt(size_t index); Primitive::Type GetType() const OVERRIDE { return type_; } void SetType(Primitive::Type type) { type_ = type; } @@ -2847,8 +2867,8 @@ class HInstanceFieldGet : public HExpression<1> { return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - bool CanDoImplicitNullCheck() const OVERRIDE { - return GetFieldOffset().Uint32Value() < kPageSize; + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; } size_t ComputeHashCode() const OVERRIDE { @@ -2881,8 +2901,8 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool CanDoImplicitNullCheck() const OVERRIDE { - return GetFieldOffset().Uint32Value() < kPageSize; + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; } const FieldInfo& GetFieldInfo() const { return field_info_; } @@ -2912,7 +2932,8 @@ class HArrayGet : public HExpression<2> { UNUSED(other); return true; } - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + UNUSED(obj); // TODO: We can be smarter here. // Currently, the array access is always preceded by an ArrayLength or a NullCheck // which generates the implicit null check. There are cases when these can be removed @@ -2954,7 +2975,8 @@ class HArraySet : public HTemplateInstruction<3> { return needs_type_check_; } - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + UNUSED(obj); // TODO: Same as for ArrayGet. return false; } @@ -3006,7 +3028,9 @@ class HArrayLength : public HExpression<1> { UNUSED(other); return true; } - bool CanDoImplicitNullCheck() const OVERRIDE { return true; } + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + return obj == InputAt(0); + } DECLARE_INSTRUCTION(ArrayLength); @@ -3343,6 +3367,7 @@ class HInstanceOf : public HExpression<2> { uint32_t dex_pc) : HExpression(Primitive::kPrimBoolean, SideEffects::None()), class_is_final_(class_is_final), + must_do_null_check_(true), dex_pc_(dex_pc) { SetRawInputAt(0, object); SetRawInputAt(1, constant); @@ -3362,10 +3387,15 @@ class HInstanceOf : public HExpression<2> { bool IsClassFinal() const { return class_is_final_; } + // Used only in code generation. + bool MustDoNullCheck() const { return must_do_null_check_; } + void ClearMustDoNullCheck() { must_do_null_check_ = false; } + DECLARE_INSTRUCTION(InstanceOf); private: const bool class_is_final_; + bool must_do_null_check_; const uint32_t dex_pc_; DISALLOW_COPY_AND_ASSIGN(HInstanceOf); @@ -3406,6 +3436,7 @@ class HCheckCast : public HTemplateInstruction<2> { uint32_t dex_pc) : HTemplateInstruction(SideEffects::None()), class_is_final_(class_is_final), + must_do_null_check_(true), dex_pc_(dex_pc) { SetRawInputAt(0, object); SetRawInputAt(1, constant); @@ -3424,6 +3455,9 @@ class HCheckCast : public HTemplateInstruction<2> { bool CanThrow() const OVERRIDE { return true; } + bool MustDoNullCheck() const { return must_do_null_check_; } + void ClearMustDoNullCheck() { must_do_null_check_ = false; } + uint32_t GetDexPc() const { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3432,11 +3466,28 @@ class HCheckCast : public HTemplateInstruction<2> { private: const bool class_is_final_; + bool must_do_null_check_; const uint32_t dex_pc_; DISALLOW_COPY_AND_ASSIGN(HCheckCast); }; +class HMemoryBarrier : public HTemplateInstruction<0> { + public: + explicit HMemoryBarrier(MemBarrierKind barrier_kind) + : HTemplateInstruction(SideEffects::None()), + barrier_kind_(barrier_kind) {} + + MemBarrierKind GetBarrierKind() { return barrier_kind_; } + + DECLARE_INSTRUCTION(MemoryBarrier); + + private: + const MemBarrierKind barrier_kind_; + + DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier); +}; + class HMonitorOperation : public HTemplateInstruction<1> { public: enum OperationKind { @@ -3502,7 +3553,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { // True if this blocks a move from the given location. bool Blocks(Location loc) const { - return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_)); + return !IsEliminated() && source_.OverlapsWith(loc); } // A move is redundant if it's been eliminated, if its source and @@ -3571,8 +3622,8 @@ class HParallelMove : public HTemplateInstruction<0> { } } for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) - << "Same destination for two moves in a parallel move."; + DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination())) + << "Overlapped destination for two moves in a parallel move."; } } moves_.Add(MoveOperands(source, destination, type, instruction)); diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index b13e07eb22..c46a21955c 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -21,9 +21,9 @@ namespace art { -void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const { +void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const { if (stats_ != nullptr) { - stats_->RecordStat(compilation_stat); + stats_->RecordStat(compilation_stat, count); } } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 8b2028177b..ccf8de9f6a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -48,7 +48,7 @@ class HOptimization : public ValueObject { void Check(); protected: - void MaybeRecordStat(MethodCompilationStat compilation_stat) const; + void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; HGraph* const graph_; // Used to record stats about the optimization. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a95696a468..05451bcaa6 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -208,6 +208,12 @@ class OptimizingCompiler FINAL : public Compiler { void UnInit() const OVERRIDE; + void MaybeRecordStat(MethodCompilationStat compilation_stat) const { + if (compilation_stats_.get() != nullptr) { + compilation_stats_->RecordStat(compilation_stat); + } + } + private: // Whether we should run any optimization or register allocation. If false, will // just run the code generation after the graph was built. @@ -226,7 +232,7 @@ class OptimizingCompiler FINAL : public Compiler { CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit) const; - mutable OptimizingCompilerStats compilation_stats_; + std::unique_ptr<OptimizingCompilerStats> compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -243,7 +249,6 @@ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) run_optimizations_( (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) && !driver->GetCompilerOptions().GetDebuggable()), - compilation_stats_(), delegate_(Create(driver, Compiler::Kind::kQuick)) {} void OptimizingCompiler::Init() { @@ -258,6 +263,9 @@ void OptimizingCompiler::Init() { << "Invoke the compiler with '-j1'."; visualizer_output_.reset(new std::ofstream(cfg_file_name)); } + if (driver->GetDumpStats()) { + compilation_stats_.reset(new OptimizingCompilerStats()); + } } void OptimizingCompiler::UnInit() const { @@ -265,7 +273,9 @@ void OptimizingCompiler::UnInit() const { } OptimizingCompiler::~OptimizingCompiler() { - compilation_stats_.Log(); + if (compilation_stats_.get() != nullptr) { + compilation_stats_->Log(); + } } void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const { @@ -310,10 +320,11 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce(graph); + HDeadCodeElimination dce1(graph, stats); + HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final"); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_not(graph); + HBooleanSimplifier boolean_simplify(graph); HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); @@ -329,20 +340,21 @@ static void RunOptimizations(HGraph* graph, HOptimization* optimizations[] = { &intrinsics, - &dce, + &dce1, &fold1, &simplify1, + &inliner, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_not, - &inliner, + &boolean_simplify, &fold2, &side_effects, &gvn, &licm, &bce, &type_propagation, - &simplify2 + &simplify2, + &dce2, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); @@ -381,7 +393,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer) const { StackHandleScopeCollection handles(Thread::Current()); - RunOptimizations(graph, compiler_driver, &compilation_stats_, + RunOptimizations(graph, compiler_driver, compilation_stats_.get(), dex_file, dex_compilation_unit, pass_info_printer, &handles); AllocateRegisters(graph, codegen, pass_info_printer); @@ -397,7 +409,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, std::vector<uint8_t> stack_map; codegen->BuildStackMaps(&stack_map); - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); + MaybeRecordStat(MethodCompilationStat::kCompiledOptimized); return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, @@ -435,7 +447,7 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); + MaybeRecordStat(MethodCompilationStat::kCompiledBaseline); return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), @@ -463,7 +475,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite const DexFile& dex_file) const { UNUSED(invoke_type); std::string method_name = PrettyMethod(method_idx, dex_file); - compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); + MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); // Always use the thumb2 assembler: some runtime functionality (like implicit stack @@ -474,12 +486,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // Do not attempt to compile on architectures we do not support. if (!IsInstructionSetSupported(instruction_set)) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa); + MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa); return nullptr; } if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledPathological); + MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); return nullptr; } @@ -489,7 +501,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace) && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledSpaceFilter); + MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter); return nullptr; } @@ -514,7 +526,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite compiler_driver->GetCompilerOptions())); if (codegen.get() == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); + MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } codegen->GetAssembler()->cfi().SetEnabled( @@ -531,7 +543,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite &dex_compilation_unit, &dex_file, compiler_driver, - &compilation_stats_); + compilation_stats_.get()); VLOG(compiler) << "Building " << method_name; @@ -558,7 +570,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); + MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); return nullptr; } } @@ -576,11 +588,11 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite VLOG(compiler) << "Compile baseline " << method_name; if (!run_optimizations_) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedDisabled); + MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled); } else if (!can_optimize) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedTryCatch); + MaybeRecordStat(MethodCompilationStat::kNotOptimizedTryCatch); } else if (!can_allocate_registers) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); + MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); } return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); @@ -603,9 +615,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, method_idx, jclass_loader, dex_file); } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); } else { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); } } @@ -616,7 +628,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, jclass_loader, dex_file); if (method != nullptr) { - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledQuick); + MaybeRecordStat(MethodCompilationStat::kCompiledQuick); } return method; } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index d4a936d1c3..65c84e6942 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,6 +29,7 @@ enum MethodCompilationStat { kCompiledBaseline, kCompiledOptimized, kCompiledQuick, + kInstructionSimplifications, kInlinedInvoke, kNotCompiledUnsupportedIsa, kNotCompiledPathological, @@ -48,8 +49,8 @@ enum MethodCompilationStat { kNotCompiledVerifyAtRuntime, kNotCompiledClassNotVerified, kRemovedCheckedCast, + kRemovedDeadInstruction, kRemovedNullCheck, - kInstructionSimplifications, kLastStat }; @@ -57,8 +58,8 @@ class OptimizingCompilerStats { public: OptimizingCompilerStats() {} - void RecordStat(MethodCompilationStat stat) { - compile_stats_[stat]++; + void RecordStat(MethodCompilationStat stat, size_t count = 1) { + compile_stats_[stat] += count; } void Log() const { @@ -82,7 +83,7 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - VLOG(compiler) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; } } } @@ -96,6 +97,7 @@ class OptimizingCompilerStats { case kCompiledOptimized : return "kCompiledOptimized"; case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; + case kInstructionSimplifications: return "kInstructionSimplifications"; case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; @@ -114,8 +116,8 @@ class OptimizingCompilerStats { case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; + case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - case kInstructionSimplifications: return "kInstructionSimplifications"; default: LOG(FATAL) << "invalid stat"; } return ""; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index ad92ca59a1..54ea6f19d4 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -17,11 +17,23 @@ #include "parallel_move_resolver.h" #include "nodes.h" -#include "locations.h" namespace art { -void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { +void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { + // Perform a linear sweep of the moves to add them to the initial list of + // moves to perform, ignoring any move that is redundant (the source is + // the same as the destination, the destination is ignored and + // unallocated, or the move was already eliminated). + for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { + MoveOperands* move = parallel_move->MoveOperandsAt(i); + if (!move->IsRedundant()) { + moves_.Add(move); + } + } +} + +void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK(moves_.IsEmpty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); @@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) { moves_.Reset(); } - -void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { - // Perform a linear sweep of the moves to add them to the initial list of - // moves to perform, ignoring any move that is redundant (the source is - // the same as the destination, the destination is ignored and - // unallocated, or the move was already eliminated). - for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { - MoveOperands* move = parallel_move->MoveOperandsAt(i); - if (!move->IsRedundant()) { - moves_.Add(move); - } - } -} - Location LowOf(Location location) { if (location.IsRegisterPair()) { return Location::RegisterLocation(location.low()); @@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati } } -MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { +MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // Each call to this function performs a move and deletes it from the move // graph. We first recursively perform any move blocking this one. We // mark a move as "pending" on entry to PerformMove in order to detect @@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { } } -bool ParallelMoveResolver::IsScratchLocation(Location loc) { +bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { for (size_t i = 0; i < moves_.Size(); ++i) { if (moves_.Get(i)->Blocks(loc)) { return false; @@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) { return false; } -int ParallelMoveResolver::AllocateScratchRegister(int blocked, - int register_count, - int if_scratch, - bool* spilled) { +int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked, + int register_count, + int if_scratch, + bool* spilled) { DCHECK_NE(blocked, if_scratch); int scratch = -1; for (int reg = 0; reg < register_count; ++reg) { @@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } -ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( - ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) +ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), reg_(kNoRegister), spilled_(false) { @@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } -ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { +ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); } } +void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { + DCHECK_EQ(GetNumberOfPendingMoves(), 0u); + DCHECK(moves_.IsEmpty()); + DCHECK(scratches_.IsEmpty()); + + // Backend dependent initialization. + PrepareForEmitNativeCode(); + + // Build up a worklist of moves. + BuildInitialMoveList(parallel_move); + + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& move = *moves_.Get(i); + // Skip constants to perform them last. They don't block other moves and + // skipping such moves with register destinations keeps those registers + // free for the whole algorithm. + if (!move.IsEliminated() && !move.GetSource().IsConstant()) { + PerformMove(i); + } + } + + // Perform the moves with constant sources and register destinations with UpdateMoveSource() + // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit + // from changing the constant sources to stack locations. + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + Location destination = move->GetDestination(); + if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { + Location source = move->GetSource(); + EmitMove(i); + move->Eliminate(); + // This may introduce additional instruction dependency, but reduce number + // of moves and possible literal loads. For example, + // Original moves: + // 1234.5678 -> D0 + // 1234.5678 -> D1 + // Updated moves: + // 1234.5678 -> D0 + // D0 -> D1 + UpdateMoveSource(source, destination); + } + } + + // Perform the rest of the moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + if (!move->IsEliminated()) { + EmitMove(i); + move->Eliminate(); + } + } + + // All pending moves that we have added for resolve cycles should be performed. + DCHECK_EQ(GetNumberOfPendingMoves(), 0u); + + // Backend dependent cleanup. + FinishEmitNativeCode(); + + moves_.Reset(); + scratches_.Reset(); +} + +Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { + for (size_t i = 0; i < scratches_.Size(); ++i) { + Location loc = scratches_.Get(i); + if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { + return loc; + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { + Location loc = moves_.Get(i)->GetDestination(); + if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { + return loc; + } + } + return Location::NoLocation(); +} + +void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { + if (kIsDebugBuild) { + for (size_t i = 0; i < scratches_.Size(); ++i) { + DCHECK(!loc.Equals(scratches_.Get(i))); + } + } + scratches_.Add(loc); +} + +void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { + DCHECK(!IsBlockedByMoves(loc)); + for (size_t i = 0; i < scratches_.Size(); ++i) { + if (loc.Equals(scratches_.Get(i))) { + scratches_.DeleteAt(i); + break; + } + } +} + +void ParallelMoveResolverNoSwap::PerformMove(size_t index) { + // Each call to this function performs a move and deletes it from the move + // graph. We first recursively perform any move blocking this one. We mark + // a move as "pending" on entry to PerformMove in order to detect cycles + // in the move graph. We use scratch location to resolve cycles, also + // additional pending moves might be added. After move has been performed, + // we will update source operand in the move graph to reduce dependencies in + // the graph. + + MoveOperands* move = moves_.Get(index); + DCHECK(!move->IsPending()); + DCHECK(!move->IsEliminated()); + if (move->IsRedundant()) { + // Previous operations on the list of moves have caused this particular move + // to become a no-op, so we can safely eliminate it. Consider for example + // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will + // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is + // used as the scratch location, the move (1 -> 2) will occur while resolving + // the cycle. When that move is emitted, the code will update moves with a '1' + // as their source to use '2' instead (see `UpdateMoveSource()`. In our example + // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be + // eliminated here. + move->Eliminate(); + return; + } + + // Clear this move's destination to indicate a pending move. The actual + // destination is saved in a stack-allocated local. Recursion may allow + // multiple moves to be pending. + DCHECK(!move->GetSource().IsInvalid()); + Location destination = move->MarkPending(); + + // Perform a depth-first traversal of the move graph to resolve + // dependencies. Any unperformed, unpending move with a source the same + // as this one's destination blocks this one so recursively perform all + // such moves. + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination) && !other_move.IsPending()) { + PerformMove(i); + } + } + + // We are about to resolve this move and don't need it marked as + // pending, so restore its destination. + move->ClearPending(destination); + + // No one else should write to the move destination when the it is pending. + DCHECK(!move->IsRedundant()); + + Location source = move->GetSource(); + // The move may be blocked on several pending moves, in case we have a cycle. + if (IsBlockedByMoves(destination)) { + // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following + // sequence: + // (C -> scratch) # Emit right now. + // (A -> B) (B -> C) # Unblocked. + // (scratch -> A) # Add to pending_moves_, blocked by (A -> B). + Location::Kind kind = source.GetKind(); + DCHECK_NE(kind, Location::kConstant); + Location scratch = AllocateScratchLocationFor(kind); + // We only care about the move size. + Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt; + // Perform (C -> scratch) + move->SetDestination(scratch); + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(source, scratch); + // Add (scratch -> A). + AddPendingMove(scratch, destination, type); + } else { + // This move is not blocked. + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(source, destination); + } + + // Moves in the pending list should not block any other moves. But performing + // unblocked moves in the pending list can free scratch registers, so we do this + // as early as possible. + MoveOperands* pending_move; + while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) { + Location pending_source = pending_move->GetSource(); + Location pending_destination = pending_move->GetDestination(); + // We do not depend on the pending move index. So just delete the move instead + // of eliminating it to make the pending list cleaner. + DeletePendingMove(pending_move); + move->SetSource(pending_source); + move->SetDestination(pending_destination); + EmitMove(index); + move->Eliminate(); + UpdateMoveSource(pending_source, pending_destination); + // Free any unblocked locations in the scratch location list. + for (size_t i = 0; i < scratches_.Size(); ++i) { + Location scratch = scratches_.Get(i); + // Only scratch overlapping with performed move source can be unblocked. + if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { + FreeScratchLocation(pending_source); + } + } + } +} + +void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { + // This function is used to reduce the dependencies in the graph after + // (from -> to) has been performed. Since we ensure there is no move with the same + // destination, (to -> X) can not be blocked while (from -> X) might still be + // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After + // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is + // a dependency between the two. If we update the source location from 1 to 2, we + // will get (0 -> 1) and (2 -> 3). There is no dependency between the two. + // + // This is not something we must do, but we can use fewer scratch locations with + // this trick. For example, we can avoid using additional scratch locations for + // moves (0 -> 1), (1 -> 2), (1 -> 0). + for (size_t i = 0; i < moves_.Size(); ++i) { + MoveOperands* move = moves_.Get(i); + if (move->GetSource().Equals(from)) { + move->SetSource(to); + } + } +} + +void ParallelMoveResolverNoSwap::AddPendingMove(Location source, + Location destination, Primitive::Type type) { + pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); +} + +void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { + pending_moves_.Delete(move); +} + +MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { + for (size_t i = 0; i < pending_moves_.Size(); ++i) { + MoveOperands* move = pending_moves_.Get(i); + Location destination = move->GetDestination(); + // Only moves with destination overlapping with input loc can be unblocked. + if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { + return move; + } + } + return nullptr; +} + +bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { + for (size_t i = 0; i < pending_moves_.Size(); ++i) { + if (pending_moves_.Get(i)->Blocks(loc)) { + return true; + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->Blocks(loc)) { + return true; + } + } + return false; +} + +// So far it is only used for debugging purposes to make sure all pending moves +// have been performed. +size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { + return pending_moves_.Size(); +} + } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 95f8ad5b74..e89417df7d 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -19,30 +19,47 @@ #include "base/value_object.h" #include "utils/growable_array.h" +#include "locations.h" namespace art { class HParallelMove; -class Location; class MoveOperands; -/** - * Helper class to resolve a set of parallel moves. Architecture dependent code - * generator must have their own subclass that implements the `EmitMove` and `EmitSwap` - * operations. - */ +// Helper classes to resolve a set of parallel moves. Architecture dependent code generator must +// have their own subclass that implements corresponding virtual functions. class ParallelMoveResolver : public ValueObject { public: explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} virtual ~ParallelMoveResolver() {} // Resolve a set of parallel moves, emitting assembler instructions. - void EmitNativeCode(HParallelMove* parallel_move); + virtual void EmitNativeCode(HParallelMove* parallel_move) = 0; + + protected: + // Build the initial list of moves. + void BuildInitialMoveList(HParallelMove* parallel_move); + + GrowableArray<MoveOperands*> moves_; + + private: + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); +}; + +// This helper class uses swap to resolve dependencies and may emit swap. +class ParallelMoveResolverWithSwap : public ParallelMoveResolver { + public: + explicit ParallelMoveResolverWithSwap(ArenaAllocator* allocator) + : ParallelMoveResolver(allocator) {} + virtual ~ParallelMoveResolverWithSwap() {} + + // Resolve a set of parallel moves, emitting assembler instructions. + void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; protected: class ScratchRegisterScope : public ValueObject { public: - ScratchRegisterScope(ParallelMoveResolver* resolver, + ScratchRegisterScope(ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers); @@ -52,11 +69,12 @@ class ParallelMoveResolver : public ValueObject { bool IsSpilled() const { return spilled_; } private: - ParallelMoveResolver* resolver_; + ParallelMoveResolverWithSwap* resolver_; int reg_; bool spilled_; }; + // Return true if the location can be scratched. bool IsScratchLocation(Location loc); // Allocate a scratch register for performing a move. The method will try to use @@ -72,15 +90,9 @@ class ParallelMoveResolver : public ValueObject { virtual void SpillScratch(int reg) = 0; virtual void RestoreScratch(int reg) = 0; - // List of moves not yet resolved. - GrowableArray<MoveOperands*> moves_; - static constexpr int kNoRegister = -1; private: - // Build the initial list of moves. - void BuildInitialMoveList(HParallelMove* parallel_move); - // Perform the move at the moves_ index in question (possibly requiring // other moves to satisfy dependencies). // @@ -99,7 +111,83 @@ class ParallelMoveResolver : public ValueObject { // the right value. MoveOperands* PerformMove(size_t index); - DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverWithSwap); +}; + +// This helper class uses additional scratch registers to resolve dependencies. It supports all kind +// of dependency cycles and does not care about the register layout. +class ParallelMoveResolverNoSwap : public ParallelMoveResolver { + public: + explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator) + : ParallelMoveResolver(allocator), scratches_(allocator, 32), + pending_moves_(allocator, 8), allocator_(allocator) {} + virtual ~ParallelMoveResolverNoSwap() {} + + // Resolve a set of parallel moves, emitting assembler instructions. + void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; + + protected: + // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent + // initialization here. + virtual void PrepareForEmitNativeCode() = 0; + + // Called at the end of EmitNativeCode(). A subclass may put some architecture dependent cleanup + // here. All scratch locations will be removed after this call. + virtual void FinishEmitNativeCode() = 0; + + // Allocate a scratch location to perform a move from input kind of location. A subclass should + // implement this to get the best fit location. If there is no suitable physical register, it can + // also return a stack slot. + virtual Location AllocateScratchLocationFor(Location::Kind kind) = 0; + + // Called after a move which takes a scratch location as source. A subclass can defer the cleanup + // to FinishEmitNativeCode(). + virtual void FreeScratchLocation(Location loc) = 0; + + // Emit a move. + virtual void EmitMove(size_t index) = 0; + + // Return a scratch location from the moves which exactly matches the kind. + // Return Location::NoLocation() if no matching scratch location can be found. + Location GetScratchLocation(Location::Kind kind); + + // Add a location to the scratch list which can be returned from GetScratchLocation() to resolve + // dependency cycles. + void AddScratchLocation(Location loc); + + // Remove a location from the scratch list. + void RemoveScratchLocation(Location loc); + + // List of scratch locations. + GrowableArray<Location> scratches_; + + private: + // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy + // dependencies). + void PerformMove(size_t index); + + void UpdateMoveSource(Location from, Location to); + + void AddPendingMove(Location source, Location destination, Primitive::Type type); + + void DeletePendingMove(MoveOperands* move); + + // Find a move that may be unblocked after (loc -> XXX) is performed. + MoveOperands* GetUnblockedPendingMove(Location loc); + + // Return true if the location is blocked by outstanding moves. + bool IsBlockedByMoves(Location loc); + + // Return the number of pending moves. + size_t GetNumberOfPendingMoves(); + + // Additional pending moves which might be added to resolve dependency cycle. + GrowableArray<MoveOperands*> pending_moves_; + + // Used to allocate pending MoveOperands. + ArenaAllocator* const allocator_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverNoSwap); }; } // namespace art diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 95cca5172b..f8f70105cf 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -19,27 +19,41 @@ #include "parallel_move_resolver.h" #include "gtest/gtest.h" +#include "gtest/gtest-typed-test.h" namespace art { -class TestParallelMoveResolver : public ParallelMoveResolver { - public: - explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {} - - void Dump(Location location) { - if (location.IsConstant()) { - message_ << "C"; - } else if (location.IsPair()) { - message_ << location.low() << "," << location.high(); - } else if (location.IsRegister()) { - message_ << location.reg(); - } else if (location.IsStackSlot()) { - message_ << location.GetStackIndex() << "(sp)"; - } else { - message_ << "2x" << location.GetStackIndex() << "(sp)"; - DCHECK(location.IsDoubleStackSlot()) << location; - } +constexpr int kScratchRegisterStartIndexForTest = 100; + +static void DumpRegisterForTest(std::ostream& os, int reg) { + if (reg >= kScratchRegisterStartIndexForTest) { + os << "T" << reg - kScratchRegisterStartIndexForTest; + } else { + os << reg; } +} + +static void DumpLocationForTest(std::ostream& os, Location location) { + if (location.IsConstant()) { + os << "C"; + } else if (location.IsPair()) { + DumpRegisterForTest(os, location.low()); + os << ","; + DumpRegisterForTest(os, location.high()); + } else if (location.IsRegister()) { + DumpRegisterForTest(os, location.reg()); + } else if (location.IsStackSlot()) { + os << location.GetStackIndex() << "(sp)"; + } else { + DCHECK(location.IsDoubleStackSlot())<< location; + os << "2x" << location.GetStackIndex() << "(sp)"; + } +} + +class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { + public: + explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator) + : ParallelMoveResolverWithSwap(allocator) {} void EmitMove(size_t index) OVERRIDE { MoveOperands* move = moves_.Get(index); @@ -47,9 +61,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver { message_ << " "; } message_ << "("; - Dump(move->GetSource()); + DumpLocationForTest(message_, move->GetSource()); message_ << " -> "; - Dump(move->GetDestination()); + DumpLocationForTest(message_, move->GetDestination()); message_ << ")"; } @@ -59,9 +73,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver { message_ << " "; } message_ << "("; - Dump(move->GetSource()); + DumpLocationForTest(message_, move->GetSource()); message_ << " <-> "; - Dump(move->GetDestination()); + DumpLocationForTest(message_, move->GetDestination()); message_ << ")"; } @@ -76,7 +90,64 @@ class TestParallelMoveResolver : public ParallelMoveResolver { std::ostringstream message_; - DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver); + DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverWithSwap); +}; + +class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { + public: + explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator) + : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {} + + void PrepareForEmitNativeCode() OVERRIDE { + scratch_index_ = kScratchRegisterStartIndexForTest; + } + + void FinishEmitNativeCode() OVERRIDE {} + + Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE { + if (kind == Location::kStackSlot || kind == Location::kFpuRegister || + kind == Location::kRegister) { + kind = Location::kRegister; + } else { + // Allocate register pair for double stack slot which simulates 32-bit backend's behavior. + kind = Location::kRegisterPair; + } + Location scratch = GetScratchLocation(kind); + if (scratch.Equals(Location::NoLocation())) { + AddScratchLocation(Location::RegisterLocation(scratch_index_)); + AddScratchLocation(Location::RegisterLocation(scratch_index_ + 1)); + AddScratchLocation(Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1)); + scratch = (kind == Location::kRegister) ? Location::RegisterLocation(scratch_index_) + : Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1); + scratch_index_ += 2; + } + return scratch; + } + + void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} + + void EmitMove(size_t index) OVERRIDE { + MoveOperands* move = moves_.Get(index); + if (!message_.str().empty()) { + message_ << " "; + } + message_ << "("; + DumpLocationForTest(message_, move->GetSource()); + message_ << " -> "; + DumpLocationForTest(message_, move->GetDestination()); + message_ << ")"; + } + + std::string GetMessage() const { + return message_.str(); + } + + private: + std::ostringstream message_; + + int scratch_index_; + + DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverNoSwap); }; static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, @@ -93,55 +164,102 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, return moves; } -TEST(ParallelMoveTest, Dependency) { +template <typename T> +class ParallelMoveTest : public ::testing::Test { + public: + static const bool has_swap; +}; + +template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true; +template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false; + +typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap> + ParallelMoveResolverTestTypes; + +TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); + + +TYPED_TEST(ParallelMoveTest, Dependency) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2 -> 3) (1 -> 2) (0 -> 1) (2 -> 4)", resolver.GetMessage().c_str()); + } } } -TEST(ParallelMoveTest, Swap) { +TYPED_TEST(ParallelMoveTest, Cycle) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> 2) (0 -> 1) (2 -> 0)", resolver.GetMessage().c_str()); + } + } + + { + TypeParam resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {0, 2}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0 -> 2) (1 <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0 -> 2) (1 -> 0) (2 -> 1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(4 -> T0) (3 -> 4) (2 -> 3) (1 -> 2) (0 -> 1) (T0 -> 0)", + resolver.GetMessage().c_str()); + } } } -TEST(ParallelMoveTest, ConstantLast) { +TYPED_TEST(ParallelMoveTest, ConstantLast) { ArenaPool pool; ArenaAllocator allocator(&pool); - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::ConstantLocation(new (&allocator) HIntConstant(0)), @@ -157,12 +275,12 @@ TEST(ParallelMoveTest, ConstantLast) { ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str()); } -TEST(ParallelMoveTest, Pairs) { +TYPED_TEST(ParallelMoveTest, Pairs) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(2), @@ -179,7 +297,7 @@ TEST(ParallelMoveTest, Pairs) { } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -196,7 +314,7 @@ TEST(ParallelMoveTest, Pairs) { } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -209,10 +327,14 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2 -> T0) (0,1 -> 2,3) (T0 -> 0)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(2), @@ -230,10 +352,15 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)", + resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(2), @@ -251,10 +378,15 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)", + resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -272,10 +404,14 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(7 -> T0) (2 -> 7) (0,1 -> 2,3) (T0 -> 1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -288,10 +424,14 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2,3 -> T0,T1) (0,1 -> 2,3) (T0,T1 -> 0,1)", resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(2, 3), @@ -304,12 +444,85 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(0,1 -> T0,T1) (2,3 -> 0,1) (T0,T1 -> 2,3)", resolver.GetMessage().c_str()); + } + } +} + +TYPED_TEST(ParallelMoveTest, MultiCycles) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TypeParam resolver(&allocator); + static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {2, 3}, {3, 2}}; + resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); + if (TestFixture::has_swap) { + ASSERT_STREQ("(1 <-> 0) (3 <-> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0) (3 -> T0) (2 -> 3) (T0 -> 2)", + resolver.GetMessage().c_str()); + } + } + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(0), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterLocation(3), + Location::RegisterLocation(1), + Primitive::kPrimInt, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2 -> T0) (3 -> T1) (0,1 -> 2,3) (T0 -> 0) (T1 -> 1)", + resolver.GetMessage().c_str()); + } + } + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(0), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterLocation(3), + Location::RegisterLocation(1), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(3 -> T0) (0,1 -> T2,T3) (T0 -> 1) (2 -> 0) (T2,T3 -> 2,3)", + resolver.GetMessage().c_str()); + } } { // Test involving registers used in single context and pair context. - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(10), @@ -327,17 +540,22 @@ TEST(ParallelMoveTest, Pairs) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2x32(sp) -> T0,T1) (4,5 -> 2x32(sp)) (10 -> 5) (T0,T1 -> 10,11)", + resolver.GetMessage().c_str()); + } } } // Test that we do 64bits moves before 32bits moves. -TEST(ParallelMoveTest, CyclesWith64BitsMoves) { +TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { ArenaPool pool; ArenaAllocator allocator(&pool); { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterLocation(0), @@ -355,11 +573,16 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) { Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(48(sp) -> T0) (1 -> 48(sp)) (0 -> 1) (T0 -> 0)", + resolver.GetMessage().c_str()); + } } { - TestParallelMoveResolver resolver(&allocator); + TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); moves->AddMove( Location::RegisterPairLocation(0, 1), @@ -377,7 +600,12 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) { Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str()); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2x32(sp) -> T0,T1) (2,3 -> 2x32(sp)) (0,1 -> 2,3) (T0,T1 -> 0,1)", + resolver.GetMessage().c_str()); + } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 479b87fea0..12b1c2b9bd 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -58,36 +58,40 @@ void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { } void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { - HInstruction* lastInstruction = block->GetLastInstruction(); - if (!lastInstruction->IsIf()) { + HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + if (ifInstruction == nullptr) { return; } - HInstruction* ifInput = lastInstruction->InputAt(0); + HInstruction* ifInput = ifInstruction->InputAt(0); if (!ifInput->IsNotEqual() && !ifInput->IsEqual()) { return; } HInstruction* input0 = ifInput->InputAt(0); HInstruction* input1 = ifInput->InputAt(1); - HInstruction* obj; + HInstruction* obj = nullptr; - if ((input0->GetType() == Primitive::kPrimNot) && input1->ActAsNullConstant()) { + if (input1->IsNullConstant()) { obj = input0; - } else if ((input1->GetType() == Primitive::kPrimNot) && input0->ActAsNullConstant()) { + } else if (input0->IsNullConstant()) { obj = input1; } else { return; } - HBoundType* bound_type = - new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false)); - - block->InsertInstructionBefore(bound_type, lastInstruction); + // We only need to bound the type if we have uses in the relevant block. + // So start with null and create the HBoundType lazily, only if it's needed. + HBoundType* bound_type = nullptr; HBasicBlock* notNullBlock = ifInput->IsNotEqual() - ? lastInstruction->AsIf()->IfTrueSuccessor() - : lastInstruction->AsIf()->IfFalseSuccessor(); + ? ifInstruction->IfTrueSuccessor() + : ifInstruction->IfFalseSuccessor(); + for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); if (notNullBlock->Dominates(user->GetBlock())) { + if (bound_type == nullptr) { + bound_type = new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false)); + notNullBlock->InsertInstructionBefore(bound_type, notNullBlock->GetFirstInstruction()); + } user->ReplaceInput(bound_type, it.Current()->GetIndex()); } } @@ -98,49 +102,58 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { // If that's the case insert an HBoundType instruction to bound the type of `x` // to `ClassX` in the scope of the dominated blocks. void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { - HInstruction* lastInstruction = block->GetLastInstruction(); - if (!lastInstruction->IsIf()) { - return; - } - HInstruction* ifInput = lastInstruction->InputAt(0); - // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual). - if (!ifInput->IsEqual()) { + HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + if (ifInstruction == nullptr) { return; } - HInstruction* instanceOf = ifInput->InputAt(0); - HInstruction* comp_value = ifInput->InputAt(1); - if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) { + HInstruction* ifInput = ifInstruction->InputAt(0); + HInstruction* instanceOf = nullptr; + HBasicBlock* instanceOfTrueBlock = nullptr; + + // The instruction simplifier has transformed: + // - `if (a instanceof A)` into an HIf with an HInstanceOf input + // - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn + // has an HInstanceOf input) + // So we should not see the usual HEqual here. + if (ifInput->IsInstanceOf()) { + instanceOf = ifInput; + instanceOfTrueBlock = ifInstruction->IfTrueSuccessor(); + } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) { + instanceOf = ifInput->InputAt(0); + instanceOfTrueBlock = ifInstruction->IfFalseSuccessor(); + } else { return; } - HInstruction* obj = instanceOf->InputAt(0); - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - - ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); - HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); - - // Narrow the type as much as possible. - { - ScopedObjectAccess soa(Thread::Current()); - if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { - bound_type->SetReferenceTypeInfo(obj_rti); - } else { - bound_type->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); - } - } - - block->InsertInstructionBefore(bound_type, lastInstruction); - // Pick the right successor based on the value we compare against. - HIntConstant* comp_value_int = comp_value->AsIntConstant(); - HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0 - ? lastInstruction->AsIf()->IfFalseSuccessor() - : lastInstruction->AsIf()->IfTrueSuccessor(); + // We only need to bound the type if we have uses in the relevant block. + // So start with null and create the HBoundType lazily, only if it's needed. + HBoundType* bound_type = nullptr; + HInstruction* obj = instanceOf->InputAt(0); for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); if (instanceOfTrueBlock->Dominates(user->GetBlock())) { + if (bound_type == nullptr) { + HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); + + ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); + + // Narrow the type as much as possible. + { + ScopedObjectAccess soa(Thread::Current()); + if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { + bound_type->SetReferenceTypeInfo(obj_rti); + } else { + bound_type->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + } + } + + instanceOfTrueBlock->InsertInstructionBefore( + bound_type, instanceOfTrueBlock->GetFirstInstruction()); + } user->ReplaceInput(bound_type, it.Current()->GetIndex()); } } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 6350b35ca1..0fdf051957 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -378,7 +378,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // Split just before first register use. size_t first_register_use = current->FirstRegisterUse(); if (first_register_use != kNoLifetime) { - LiveInterval* split = Split(current, first_register_use - 1); + LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); // Don't add directly to `unhandled`, it needs to be sorted and the start // of this new interval might be after intervals already in the list. AddSorted(&unhandled, split); @@ -903,6 +903,10 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { return false; } + // We use the first use to compare with other intervals. If this interval + // is used after any active intervals, we will spill this interval. + size_t first_use = current->FirstUseAfter(current->GetStart()); + // First set all registers as not being used. size_t* next_use = registers_array_; for (size_t i = 0; i < number_of_registers_; ++i) { @@ -917,7 +921,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (active->IsFixed()) { next_use[active->GetRegister()] = current->GetStart(); } else { - size_t use = active->FirstRegisterUseAfter(current->GetStart()); + size_t use = active->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[active->GetRegister()] = use; } @@ -945,7 +949,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { next_use[inactive->GetRegister()] = std::min(next_intersection, next_use[inactive->GetRegister()]); } else { - size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); + size_t use = inactive->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); } @@ -959,16 +963,16 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK(current->IsHighInterval()); reg = current->GetRegister(); // When allocating the low part, we made sure the high register was available. - DCHECK_LT(first_register_use, next_use[reg]); + DCHECK_LT(first_use, next_use[reg]); } else if (current->IsLowInterval()) { reg = FindAvailableRegisterPair(next_use, first_register_use); // We should spill if both registers are not available. - should_spill = (first_register_use >= next_use[reg]) - || (first_register_use >= next_use[GetHighForLowRegister(reg)]); + should_spill = (first_use >= next_use[reg]) + || (first_use >= next_use[GetHighForLowRegister(reg)]); } else { DCHECK(!current->IsHighInterval()); reg = FindAvailableRegister(next_use); - should_spill = (first_register_use >= next_use[reg]); + should_spill = (first_use >= next_use[reg]); } DCHECK_NE(reg, kNoRegister); @@ -993,15 +997,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); - LiveInterval* split = Split(current, first_register_use - 1); + LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); if (current == split) { DumpInterval(std::cerr, current); DumpAllIntervals(std::cerr); // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); CHECK(false) << "There is not enough registers available for " << split->GetParent()->GetDefinedBy()->DebugName() << " " << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1; + << " at " << first_register_use - 1 << " " + << (at == nullptr ? "" : at->DebugName()); } AddSorted(unhandled_, split); } @@ -1094,6 +1100,31 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } } +LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) { + HBasicBlock* block_from = liveness_.GetBlockFromPosition(from); + HBasicBlock* block_to = liveness_.GetBlockFromPosition(to); + DCHECK(block_from != nullptr); + DCHECK(block_to != nullptr); + + // Both locations are in the same block. We split at the given location. + if (block_from == block_to) { + return Split(interval, to); + } + + // If `to` is in a loop, find the outermost loop header which does not contain `from`. + for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) { + HBasicBlock* header = it.Current()->GetHeader(); + if (block_from->GetLifetimeStart() >= header->GetLifetimeStart()) { + break; + } + block_to = header; + } + + // Split at the start of the found block, to piggy back on existing moves + // due to resolution if non-linear control flow (see `ConnectSplitSiblings`). + return Split(interval, block_to->GetLifetimeStart()); +} + LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { DCHECK_GE(position, interval->GetStart()); DCHECK(!interval->IsDeadAt(position)); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 717be75533..dc9c708eea 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -86,8 +86,12 @@ class RegisterAllocator { // Add `interval` in the given sorted list. static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval); - // Split `interval` at the position `at`. The new interval starts at `at`. - LiveInterval* Split(LiveInterval* interval, size_t at); + // Split `interval` at the position `position`. The new interval starts at `position`. + LiveInterval* Split(LiveInterval* interval, size_t position); + + // Split `interval` at a position between `from` and `to`. The method will try + // to find an optimal split position. + LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to); // Returns whether `reg` is blocked by the code generator. bool IsBlocked(int reg) const; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 182cd0e833..8c6d904a4c 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -854,6 +854,10 @@ TEST(RegisterAllocatorTest, SpillInactive) { X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); + // Populate the instructions in the liveness object, to please the register allocator. + for (size_t i = 0; i < 32; ++i) { + liveness.instructions_from_lifetime_position_.Add(user); + } RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.unhandled_core_intervals_.Add(fourth); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 8eb98a186b..97254edb5e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -131,6 +131,9 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { void Dump(std::ostream& stream) const { stream << position_; + if (is_environment_) { + stream << " (env)"; + } } UsePosition* Dup(ArenaAllocator* allocator) const { @@ -330,7 +333,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } if (after_loop == nullptr) { // Uses are only in the loop. - first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(start, end, nullptr); + first_range_ = last_range_ = range_search_start_ = + new (allocator_) LiveRange(start, end, nullptr); } else if (after_loop->GetStart() <= end) { first_range_ = range_search_start_ = after_loop; // There are uses after the loop. @@ -366,6 +370,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveInterval* GetParent() const { return parent_; } + // Returns whether this interval is the parent interval, that is, the interval + // that starts where the HInstruction is defined. + bool IsParent() const { return parent_ == this; } + LiveRange* GetFirstRange() const { return first_range_; } LiveRange* GetLastRange() const { return last_range_; } @@ -442,7 +450,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && defined_by_ != nullptr) { + if (position == GetStart() && IsParent()) { LocationSummary* locations = defined_by_->GetLocations(); Location location = locations->Out(); // This interval is the first interval of the instruction. If the output @@ -491,12 +499,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } + if (position == GetStart() && IsParent()) { + if (defined_by_->GetLocations()->Out().IsValid()) { + return position; + } + } + UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { if (!use->GetIsEnvironment()) { + Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); size_t use_position = use->GetPosition(); - if (use_position > position) { + if (use_position > position && location.IsValid()) { return use_position; } } @@ -582,7 +597,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { previous->next_ = nullptr; new_interval->first_range_ = current; if (range_search_start_ != nullptr && range_search_start_->GetEnd() >= current->GetEnd()) { - // Search start point is inside `new_interval`. Change it to nullptr + // Search start point is inside `new_interval`. Change it to null // (i.e. the end of the interval) in the original interval. range_search_start_ = nullptr; } @@ -725,7 +740,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } void AddHighInterval(bool is_temp = false) { - DCHECK_EQ(GetParent(), this); + DCHECK(IsParent()); DCHECK(!HasHighInterval()); DCHECK(!HasLowInterval()); high_or_low_interval_ = new (allocator_) LiveInterval( @@ -849,7 +864,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { defined_by_(defined_by) {} // Searches for a LiveRange that either covers the given position or is the - // first next LiveRange. Returns nullptr if no such LiveRange exists. Ranges + // first next LiveRange. Returns null if no such LiveRange exists. Ranges // known to end before `position` can be skipped with `search_start`. LiveRange* FindRangeAtOrAfter(size_t position, LiveRange* search_start) const { if (kIsDebugBuild) { @@ -983,6 +998,15 @@ class SsaLivenessAnalysis : public ValueObject { return instructions_from_lifetime_position_.Get(index); } + HBasicBlock* GetBlockFromPosition(size_t index) const { + HInstruction* instruction = GetInstructionFromPosition(index / 2); + if (instruction == nullptr) { + // If we are at a block boundary, get the block following. + instruction = GetInstructionFromPosition((index / 2) + 1); + } + return instruction->GetBlock(); + } + HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); @@ -1053,6 +1077,8 @@ class SsaLivenessAnalysis : public ValueObject { GrowableArray<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); + DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis); }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc new file mode 100644 index 0000000000..8344fc3237 --- /dev/null +++ b/compiler/optimizing/stack_map_stream.cc @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stack_map_stream.h" + +namespace art { + +void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth) { + DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; + current_entry_.dex_pc = dex_pc; + current_entry_.native_pc_offset = native_pc_offset; + current_entry_.register_mask = register_mask; + current_entry_.sp_mask = sp_mask; + current_entry_.num_dex_registers = num_dex_registers; + current_entry_.inlining_depth = inlining_depth; + current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_map_hash = 0; + current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; + if (num_dex_registers != 0) { + current_entry_.live_dex_registers_mask = + new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); + } else { + current_entry_.live_dex_registers_mask = nullptr; + } + + if (sp_mask != nullptr) { + stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); + } + if (inlining_depth > 0) { + number_of_stack_maps_with_inline_info_++; + } + + dex_pc_max_ = std::max(dex_pc_max_, dex_pc); + native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); + register_mask_max_ = std::max(register_mask_max_, register_mask); +} + +void StackMapStream::EndStackMapEntry() { + current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); + stack_maps_.Add(current_entry_); + current_entry_ = StackMapEntry(); +} + +void StackMapStream::AddDexRegisterEntry(uint16_t dex_register, + DexRegisterLocation::Kind kind, + int32_t value) { + DCHECK_LT(dex_register, current_entry_.num_dex_registers); + + if (kind != DexRegisterLocation::Kind::kNone) { + // Ensure we only use non-compressed location kind at this stage. + DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) + << DexRegisterLocation::PrettyDescriptor(kind); + DexRegisterLocation location(kind, value); + + // Look for Dex register `location` in the location catalog (using the + // companion hash map of locations to indices). Use its index if it + // is already in the location catalog. If not, insert it (in the + // location catalog and the hash map) and use the newly created index. + auto it = location_catalog_entries_indices_.Find(location); + if (it != location_catalog_entries_indices_.end()) { + // Retrieve the index from the hash map. + dex_register_locations_.Add(it->second); + } else { + // Create a new entry in the location catalog and the hash map. + size_t index = location_catalog_entries_.Size(); + location_catalog_entries_.Add(location); + dex_register_locations_.Add(index); + location_catalog_entries_indices_.Insert(std::make_pair(location, index)); + } + + current_entry_.live_dex_registers_mask->SetBit(dex_register); + current_entry_.dex_register_map_hash += + (1 << (dex_register % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte))); + current_entry_.dex_register_map_hash += static_cast<uint32_t>(value); + current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind); + } +} + +void StackMapStream::AddInlineInfoEntry(uint32_t method_index) { + InlineInfoEntry entry; + entry.method_index = method_index; + inline_infos_.Add(entry); +} + +size_t StackMapStream::PrepareForFillIn() { + int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. + stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; + inline_info_size_ = ComputeInlineInfoSize(); + dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + stack_maps_size_ = stack_maps_.Size() + * StackMap::ComputeStackMapSize(stack_mask_size_, + inline_info_size_, + dex_register_maps_size_, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); + dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); + + // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. + needed_size_ = CodeInfo::kFixedSize + + dex_register_location_catalog_size_ + + stack_maps_size_ + + dex_register_maps_size_ + + inline_info_size_; + + dex_register_location_catalog_start_ = CodeInfo::kFixedSize; + stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_; + dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_; + inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_; + + return needed_size_; +} + +size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { + size_t size = DexRegisterLocationCatalog::kFixedSize; + for (size_t location_catalog_entry_index = 0; + location_catalog_entry_index < location_catalog_entries_.Size(); + ++location_catalog_entry_index) { + DexRegisterLocation dex_register_location = + location_catalog_entries_.Get(location_catalog_entry_index); + size += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + return size; +} + +size_t StackMapStream::ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + // Size of the map in bytes. + size_t size = DexRegisterMap::kFixedSize; + // Add the live bit mask for the Dex register liveness. + size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); + // Compute the size of the set of live Dex register entries. + size_t number_of_live_dex_registers = 0; + for (size_t dex_register_number = 0; + dex_register_number < entry.num_dex_registers; + ++dex_register_number) { + if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + ++number_of_live_dex_registers; + } + } + size_t map_entries_size_in_bits = + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + * number_of_live_dex_registers; + size_t map_entries_size_in_bytes = + RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; + size += map_entries_size_in_bytes; + return size; +} + +size_t StackMapStream::ComputeDexRegisterMapsSize() const { + size_t size = 0; + for (size_t i = 0; i < stack_maps_.Size(); ++i) { + StackMapEntry entry = stack_maps_.Get(i); + if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { + // Entries with the same dex map will have the same offset. + size += ComputeDexRegisterMapSize(entry); + } + } + return size; +} + +size_t StackMapStream::ComputeInlineInfoSize() const { + return inline_infos_.Size() * InlineInfo::SingleEntrySize() + // For encoding the depth. + + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); +} + +void StackMapStream::FillIn(MemoryRegion region) { + DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; + DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn"; + + CodeInfo code_info(region); + DCHECK_EQ(region.size(), needed_size_); + code_info.SetOverallSize(region.size()); + + MemoryRegion dex_register_locations_region = region.Subregion( + dex_register_maps_start_, dex_register_maps_size_); + + MemoryRegion inline_infos_region = region.Subregion( + inline_infos_start_, inline_info_size_); + + code_info.SetEncoding(inline_info_size_, + dex_register_maps_size_, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); + code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetStackMaskSize(stack_mask_size_); + DCHECK_EQ(code_info.GetStackMapsSize(), stack_maps_size_); + + // Set the Dex register location catalog. + code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size()); + MemoryRegion dex_register_location_catalog_region = region.Subregion( + dex_register_location_catalog_start_, dex_register_location_catalog_size_); + DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); + // Offset in `dex_register_location_catalog` where to store the next + // register location. + size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; + for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { + DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); + location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + // Ensure we reached the end of the Dex registers location_catalog. + DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); + + uintptr_t next_dex_register_map_offset = 0; + uintptr_t next_inline_info_offset = 0; + for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + StackMap stack_map = code_info.GetStackMapAt(i); + StackMapEntry entry = stack_maps_.Get(i); + + stack_map.SetDexPc(code_info, entry.dex_pc); + stack_map.SetNativePcOffset(code_info, entry.native_pc_offset); + stack_map.SetRegisterMask(code_info, entry.register_mask); + if (entry.sp_mask != nullptr) { + stack_map.SetStackMask(code_info, *entry.sp_mask); + } + + if (entry.num_dex_registers == 0) { + // No dex map available. + stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap); + } else { + // Search for an entry with the same dex map. + if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) { + // If we have a hit reuse the offset. + stack_map.SetDexRegisterMapOffset(code_info, + code_info.GetStackMapAt(entry.same_dex_register_map_as_) + .GetDexRegisterMapOffset(code_info)); + } else { + // New dex registers maps should be added to the stack map. + MemoryRegion register_region = + dex_register_locations_region.Subregion( + next_dex_register_map_offset, + ComputeDexRegisterMapSize(entry)); + next_dex_register_map_offset += register_region.size(); + DexRegisterMap dex_register_map(register_region); + stack_map.SetDexRegisterMapOffset( + code_info, register_region.start() - dex_register_locations_region.start()); + + // Set the live bit mask. + dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); + + // Set the dex register location mapping data. + for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + dex_register_number < entry.num_dex_registers; + ++dex_register_number) { + if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + size_t location_catalog_entry_index = + dex_register_locations_.Get(entry.dex_register_locations_start_index + + index_in_dex_register_locations); + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + entry.num_dex_registers, + location_catalog_entries_.Size()); + ++index_in_dex_register_locations; + } + } + } + } + + // Set the inlining info. + if (entry.inlining_depth != 0) { + MemoryRegion inline_region = inline_infos_region.Subregion( + next_inline_info_offset, + InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize()); + next_inline_info_offset += inline_region.size(); + InlineInfo inline_info(inline_region); + + // Currently relative to the dex register map. + stack_map.SetInlineDescriptorOffset( + code_info, inline_region.start() - dex_register_locations_region.start()); + + inline_info.SetDepth(entry.inlining_depth); + for (size_t j = 0; j < entry.inlining_depth; ++j) { + InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index); + inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); + } + } else { + if (inline_info_size_ != 0) { + stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo); + } + } + } +} + +size_t StackMapStream::FindEntryWithTheSameDexMap() { + size_t current_entry_index = stack_maps_.Size(); + auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); + if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { + // We don't have a perfect hash functions so we need a list to collect all stack maps + // which might have the same dex register map. + GrowableArray<uint32_t> stack_map_indices(allocator_, 1); + stack_map_indices.Add(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); + return kNoSameDexMapFound; + } + + // We might have collisions, so we need to check whether or not we really have a match. + for (size_t i = 0; i < entries_it->second.Size(); i++) { + size_t test_entry_index = entries_it->second.Get(i); + if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + return test_entry_index; + } + } + entries_it->second.Add(current_entry_index); + return kNoSameDexMapFound; +} + +bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const { + if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) { + return true; + } + if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) { + return false; + } + if (a.num_dex_registers != b.num_dex_registers) { + return false; + } + + int index_in_dex_register_locations = 0; + for (uint32_t i = 0; i < a.num_dex_registers; i++) { + if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { + return false; + } + if (a.live_dex_registers_mask->IsBitSet(i)) { + size_t a_loc = dex_register_locations_.Get( + a.dex_register_locations_start_index + index_in_dex_register_locations); + size_t b_loc = dex_register_locations_.Get( + b.dex_register_locations_start_index + index_in_dex_register_locations); + if (a_loc != b_loc) { + return false; + } + ++index_in_dex_register_locations; + } + } + return true; +} + +} // namespace art diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 9a9e068a9b..0c626be89f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -70,13 +70,18 @@ class StackMapStream : public ValueObject { native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {} - - // Compute bytes needed to encode a mask with the given maximum element. - static uint32_t StackMaskEncodingSize(int max_element) { - int number_of_bits = max_element + 1; // Need room for max element too. - return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte; - } + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + current_entry_(), + stack_mask_size_(0), + inline_info_size_(0), + dex_register_maps_size_(0), + stack_maps_size_(0), + dex_register_location_catalog_size_(0), + dex_register_location_catalog_start_(0), + stack_maps_start_(0), + dex_register_maps_start_(0), + inline_infos_start_(0), + needed_size_(0) {} // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -90,380 +95,42 @@ class StackMapStream : public ValueObject { size_t inline_infos_start_index; BitVector* live_dex_registers_mask; uint32_t dex_register_map_hash; + size_t same_dex_register_map_as_; }; struct InlineInfoEntry { uint32_t method_index; }; - void AddStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth) { - StackMapEntry entry; - entry.dex_pc = dex_pc; - entry.native_pc_offset = native_pc_offset; - entry.register_mask = register_mask; - entry.sp_mask = sp_mask; - entry.num_dex_registers = num_dex_registers; - entry.inlining_depth = inlining_depth; - entry.dex_register_locations_start_index = dex_register_locations_.Size(); - entry.inline_infos_start_index = inline_infos_.Size(); - entry.dex_register_map_hash = 0; - if (num_dex_registers != 0) { - entry.live_dex_registers_mask = - new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); - } else { - entry.live_dex_registers_mask = nullptr; - } - stack_maps_.Add(entry); - - if (sp_mask != nullptr) { - stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); - } - if (inlining_depth > 0) { - number_of_stack_maps_with_inline_info_++; - } - - dex_pc_max_ = std::max(dex_pc_max_, dex_pc); - native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); - register_mask_max_ = std::max(register_mask_max_, register_mask); - } - - void AddInlineInfoEntry(uint32_t method_index) { - InlineInfoEntry entry; - entry.method_index = method_index; - inline_infos_.Add(entry); - } - - size_t ComputeNeededSize() { - size_t size = CodeInfo::kFixedSize - + ComputeDexRegisterLocationCatalogSize() - + ComputeStackMapsSize() - + ComputeDexRegisterMapsSize() - + ComputeInlineInfoSize(); - // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. - return size; - } - - size_t ComputeStackMaskSize() const { - return StackMaskEncodingSize(stack_mask_max_); - } - - size_t ComputeStackMapsSize() { - return stack_maps_.Size() * StackMap::ComputeStackMapSize( - ComputeStackMaskSize(), - ComputeInlineInfoSize(), - ComputeDexRegisterMapsSize(), - dex_pc_max_, - native_pc_offset_max_, - register_mask_max_); - } - - // Compute the size of the Dex register location catalog of `entry`. - size_t ComputeDexRegisterLocationCatalogSize() const { - size_t size = DexRegisterLocationCatalog::kFixedSize; - for (size_t location_catalog_entry_index = 0; - location_catalog_entry_index < location_catalog_entries_.Size(); - ++location_catalog_entry_index) { - DexRegisterLocation dex_register_location = - location_catalog_entries_.Get(location_catalog_entry_index); - size += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - return size; - } - - size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { - // Size of the map in bytes. - size_t size = DexRegisterMap::kFixedSize; - // Add the live bit mask for the Dex register liveness. - size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); - // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = 0; - for (size_t dex_register_number = 0; - dex_register_number < entry.num_dex_registers; - ++dex_register_number) { - if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - ++number_of_live_dex_registers; - } - } - size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) - * number_of_live_dex_registers; - size_t map_entries_size_in_bytes = - RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; - size += map_entries_size_in_bytes; - return size; - } - - // Compute the size of all the Dex register maps. - size_t ComputeDexRegisterMapsSize() { - size_t size = 0; - for (size_t i = 0; i < stack_maps_.Size(); ++i) { - if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) { - // Entries with the same dex map will have the same offset. - size += ComputeDexRegisterMapSize(stack_maps_.Get(i)); - } - } - return size; - } - - // Compute the size of all the inline information pieces. - size_t ComputeInlineInfoSize() const { - return inline_infos_.Size() * InlineInfo::SingleEntrySize() - // For encoding the depth. - + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); - } + void BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth); + void EndStackMapEntry(); - size_t ComputeDexRegisterLocationCatalogStart() const { - return CodeInfo::kFixedSize; - } - - size_t ComputeStackMapsStart() const { - return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize(); - } - - size_t ComputeDexRegisterMapsStart() { - return ComputeStackMapsStart() + ComputeStackMapsSize(); - } - - size_t ComputeInlineInfoStart() { - return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize(); - } + void AddDexRegisterEntry(uint16_t dex_register, + DexRegisterLocation::Kind kind, + int32_t value); - void FillIn(MemoryRegion region) { - CodeInfo code_info(region); - DCHECK_EQ(region.size(), ComputeNeededSize()); - code_info.SetOverallSize(region.size()); + void AddInlineInfoEntry(uint32_t method_index); - size_t stack_mask_size = ComputeStackMaskSize(); - - size_t dex_register_map_size = ComputeDexRegisterMapsSize(); - size_t inline_info_size = ComputeInlineInfoSize(); - - MemoryRegion dex_register_locations_region = region.Subregion( - ComputeDexRegisterMapsStart(), - dex_register_map_size); - - MemoryRegion inline_infos_region = region.Subregion( - ComputeInlineInfoStart(), - inline_info_size); - - code_info.SetEncoding(inline_info_size, - dex_register_map_size, - dex_pc_max_, - native_pc_offset_max_, - register_mask_max_); - code_info.SetNumberOfStackMaps(stack_maps_.Size()); - code_info.SetStackMaskSize(stack_mask_size); - DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize()); - - // Set the Dex register location catalog. - code_info.SetNumberOfDexRegisterLocationCatalogEntries( - location_catalog_entries_.Size()); - MemoryRegion dex_register_location_catalog_region = region.Subregion( - ComputeDexRegisterLocationCatalogStart(), - ComputeDexRegisterLocationCatalogSize()); - DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); - // Offset in `dex_register_location_catalog` where to store the next - // register location. - size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { - DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); - dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); - location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - // Ensure we reached the end of the Dex registers location_catalog. - DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); - - uintptr_t next_dex_register_map_offset = 0; - uintptr_t next_inline_info_offset = 0; - for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { - StackMap stack_map = code_info.GetStackMapAt(i); - StackMapEntry entry = stack_maps_.Get(i); - - stack_map.SetDexPc(code_info, entry.dex_pc); - stack_map.SetNativePcOffset(code_info, entry.native_pc_offset); - stack_map.SetRegisterMask(code_info, entry.register_mask); - if (entry.sp_mask != nullptr) { - stack_map.SetStackMask(code_info, *entry.sp_mask); - } - - if (entry.num_dex_registers == 0) { - // No dex map available. - stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap); - } else { - // Search for an entry with the same dex map. - size_t entry_with_same_map = FindEntryWithTheSameDexMap(i); - if (entry_with_same_map != kNoSameDexMapFound) { - // If we have a hit reuse the offset. - stack_map.SetDexRegisterMapOffset(code_info, - code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info)); - } else { - // New dex registers maps should be added to the stack map. - MemoryRegion register_region = - dex_register_locations_region.Subregion( - next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry)); - next_dex_register_map_offset += register_region.size(); - DexRegisterMap dex_register_map(register_region); - stack_map.SetDexRegisterMapOffset( - code_info, register_region.start() - dex_register_locations_region.start()); - - // Set the live bit mask. - dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); - - // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < entry.num_dex_registers; - ++dex_register_number) { - if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = - dex_register_locations_.Get(entry.dex_register_locations_start_index - + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - entry.num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } - } - } - } - - // Set the inlining info. - if (entry.inlining_depth != 0) { - MemoryRegion inline_region = inline_infos_region.Subregion( - next_inline_info_offset, - InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize()); - next_inline_info_offset += inline_region.size(); - InlineInfo inline_info(inline_region); - - // Currently relative to the dex register map. - stack_map.SetInlineDescriptorOffset( - code_info, inline_region.start() - dex_register_locations_region.start()); - - inline_info.SetDepth(entry.inlining_depth); - for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index); - inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); - } - } else { - if (inline_info_size != 0) { - stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo); - } - } - } - } - - void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) { - StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); - DCHECK_LT(dex_register, entry.num_dex_registers); - - if (kind != DexRegisterLocation::Kind::kNone) { - // Ensure we only use non-compressed location kind at this stage. - DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) - << DexRegisterLocation::PrettyDescriptor(kind); - DexRegisterLocation location(kind, value); - - // Look for Dex register `location` in the location catalog (using the - // companion hash map of locations to indices). Use its index if it - // is already in the location catalog. If not, insert it (in the - // location catalog and the hash map) and use the newly created index. - auto it = location_catalog_entries_indices_.Find(location); - if (it != location_catalog_entries_indices_.end()) { - // Retrieve the index from the hash map. - dex_register_locations_.Add(it->second); - } else { - // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.Size(); - location_catalog_entries_.Add(location); - dex_register_locations_.Add(index); - location_catalog_entries_indices_.Insert(std::make_pair(location, index)); - } - - entry.live_dex_registers_mask->SetBit(dex_register); - entry.dex_register_map_hash += - (1 << (dex_register % (sizeof(entry.dex_register_map_hash) * kBitsPerByte))); - entry.dex_register_map_hash += static_cast<uint32_t>(value); - entry.dex_register_map_hash += static_cast<uint32_t>(kind); - stack_maps_.Put(stack_maps_.Size() - 1, entry); - } - } + // Prepares the stream to fill in a memory region. Must be called before FillIn. + // Returns the size (in bytes) needed to store this stream. + size_t PrepareForFillIn(); + void FillIn(MemoryRegion region); private: - // Returns the index of an entry with the same dex register map - // or kNoSameDexMapFound if no such entry exists. - size_t FindEntryWithTheSameDexMap(size_t entry_index) { - StackMapEntry entry = stack_maps_.Get(entry_index); - auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash); - if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { - // We don't have a perfect hash functions so we need a list to collect all stack maps - // which might have the same dex register map. - GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(entry_index); - dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices); - return kNoSameDexMapFound; - } - - // TODO: We don't need to add ourselves to the map if we can guarantee that - // FindEntryWithTheSameDexMap is called just once per stack map entry. - // A good way to do this is to cache the offset in the stack map entry. This - // is easier to do if we add markers when the stack map constructions begins - // and when it ends. + size_t ComputeDexRegisterLocationCatalogSize() const; + size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const; + size_t ComputeDexRegisterMapsSize() const; + size_t ComputeInlineInfoSize() const; - // We might have collisions, so we need to check whether or not we should - // add the entry to the map. `needs_to_be_added` keeps track of this. - bool needs_to_be_added = true; - size_t result = kNoSameDexMapFound; - for (size_t i = 0; i < entries_it->second.Size(); i++) { - size_t test_entry_index = entries_it->second.Get(i); - if (test_entry_index == entry_index) { - needs_to_be_added = false; - } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) { - result = test_entry_index; - needs_to_be_added = false; - break; - } - } - if (needs_to_be_added) { - entries_it->second.Add(entry_index); - } - return result; - } - - bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const { - if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) { - return true; - } - if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) { - return false; - } - if (a.num_dex_registers != b.num_dex_registers) { - return false; - } - - int index_in_dex_register_locations = 0; - for (uint32_t i = 0; i < a.num_dex_registers; i++) { - if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { - return false; - } - if (a.live_dex_registers_mask->IsBitSet(i)) { - size_t a_loc = dex_register_locations_.Get( - a.dex_register_locations_start_index + index_in_dex_register_locations); - size_t b_loc = dex_register_locations_.Get( - b.dex_register_locations_start_index + index_in_dex_register_locations); - if (a_loc != b_loc) { - return false; - } - ++index_in_dex_register_locations; - } - } - return true; - } + // Returns the index of an entry with the same dex register map as the current_entry, + // or kNoSameDexMapFound if no such entry exists. + size_t FindEntryWithTheSameDexMap(); + bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const; ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; @@ -476,8 +143,7 @@ class StackMapStream : public ValueObject { DexRegisterLocationHashFn> LocationCatalogEntriesIndices; LocationCatalogEntriesIndices location_catalog_entries_indices_; - // A set of concatenated maps of Dex register locations indices to - // `location_catalog_entries_`. + // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. GrowableArray<size_t> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; @@ -488,6 +154,18 @@ class StackMapStream : public ValueObject { ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + StackMapEntry current_entry_; + size_t stack_mask_size_; + size_t inline_info_size_; + size_t dex_register_maps_size_; + size_t stack_maps_size_; + size_t dex_register_location_catalog_size_; + size_t dex_register_location_catalog_start_; + size_t stack_maps_start_; + size_t dex_register_maps_start_; + size_t inline_infos_start_; + size_t needed_size_; + static constexpr uint32_t kNoSameDexMapFound = -1; DISALLOW_COPY_AND_ASSIGN(StackMapStream); diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 8d160bc81e..3291a77021 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -40,11 +40,12 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -123,20 +124,22 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(2); sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); + stream.EndStackMapEntry(); ArenaBitVector sp_mask2(&arena, 0, true); sp_mask2.SetBit(3); sp_mask1.SetBit(8); - stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location. stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -273,11 +276,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -353,7 +357,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 1024; // Create the first stack map (and its Dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { // Use two different Dex register locations to populate this map, @@ -362,13 +366,15 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { // art::DexRegisterMap::SingleEntrySizeInBits). stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location. } + stream.EndStackMapEntry(); // Create the second stack map (and its Dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); for (uint32_t i = 0; i < number_of_dex_registers; ++i) { stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location. } + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -413,19 +419,22 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); // Second stack map, which should share the same dex register map. - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); // Third stack map (doesn't share the dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -462,9 +471,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 0; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); |