diff options
Diffstat (limited to 'compiler/optimizing')
40 files changed, 1916 insertions, 956 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 06328f2490..30c89f2d15 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -72,8 +72,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { return graph->GetIntConstant(0); } } else { - // General case when 'cond' is another instruction of type boolean. - DCHECK_EQ(cond->GetType(), Primitive::Type::kPrimBoolean); + // General case when 'cond' is another instruction of type boolean, + // as verified by SSAChecker. return new (allocator) HBooleanNot(cond); } } @@ -120,8 +120,11 @@ void HBooleanSimplifier::Run() { phi->ReplaceWith(replacement); merge_block->RemovePhi(phi); - // Link the start/end blocks and remove empty branches. - graph_->MergeEmptyBranches(block, merge_block); + // Delete the true branch and merge the resulting chain of blocks + // 'block->false_block->merge_block' into one. + true_block->DisconnectAndDelete(); + block->MergeWith(false_block); + block->MergeWith(merge_block); // Remove the original condition if it is now unused. if (!if_condition->HasUses()) { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 6511120794..3645f19f09 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1064,7 +1064,7 @@ class BCEVisitor : public HGraphVisitor { }; void BoundsCheckElimination::Run() { - if (!graph_->HasArrayAccesses()) { + if (!graph_->HasBoundsChecks()) { return; } diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 75cf1cf063..97be778dbd 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -148,7 +148,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -220,7 +220,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -292,7 +292,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -365,7 +365,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, int increment, IfCondition cond = kCondGE) { HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -502,7 +502,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, int increment = -1, IfCondition cond = kCondLE) { HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -633,7 +633,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, int increment, IfCondition cond) { HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -744,7 +744,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, int initial, IfCondition cond = kCondGE) { HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -869,7 +869,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 818d671b5b..96e08fd24c 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -21,6 +21,7 @@ #include "class_linker.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" +#include "dex/verified_method.h" #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "mirror/class_loader.h" @@ -587,7 +588,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_); Primitive::Type return_type = Primitive::GetType(descriptor[0]); bool is_instance_call = invoke_type != kStatic; - const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); + size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); MethodReference target_method(dex_file_, method_idx); uintptr_t direct_code; @@ -605,7 +606,15 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } DCHECK(optimized_invoke_type != kSuper); + // By default, consider that the called method implicitly requires + // an initialization check of its declaring method. + HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement = + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; + // Potential class initialization check, in the case of a static method call. + HClinitCheck* clinit_check = nullptr; + HInvoke* invoke = nullptr; + if (optimized_invoke_type == kVirtual) { invoke = new (arena_) HInvokeVirtual( arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); @@ -620,9 +629,76 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, bool is_recursive = (target_method.dex_method_index == dex_compilation_unit_->GetDexMethodIndex()); DCHECK(!is_recursive || (target_method.dex_file == dex_compilation_unit_->GetDexFile())); + + if (optimized_invoke_type == kStatic) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<4> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + *dex_compilation_unit_->GetDexFile()))); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + mirror::ArtMethod* resolved_method = compiler_driver_->ResolveMethod( + soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, + optimized_invoke_type); + + if (resolved_method == nullptr) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod); + return false; + } + + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file))); + Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass())); + + // The index at which the method's class is stored in the DexCache's type array. + uint32_t storage_index = DexFile::kDexNoIndex; + bool is_referrer_class = (resolved_method->GetDeclaringClass() == referrer_class.Get()); + if (is_referrer_class) { + storage_index = referrer_class->GetDexTypeIndex(); + } else if (outer_dex_cache.Get() == dex_cache.Get()) { + // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer. + compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(), + referrer_class.Get(), + resolved_method, + method_idx, + &storage_index); + } + + if (referrer_class.Get()->IsSubClass(resolved_method->GetDeclaringClass())) { + // If the referrer class is the declaring class or a subclass + // of the declaring class, no class initialization is needed + // before the static method call. + clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; + } else if (storage_index != DexFile::kDexNoIndex) { + // If the method's class type index is available, check + // whether we should add an explicit class initialization + // check for its declaring class before the static method call. + + // TODO: find out why this check is needed. + bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( + *outer_compilation_unit_->GetDexFile(), storage_index); + bool is_initialized = + resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; + + if (is_initialized) { + clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; + } else { + clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = + new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); + ++number_of_arguments; + } + } + } + invoke = new (arena_) HInvokeStaticOrDirect( arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, invoke_type, optimized_invoke_type); + is_recursive, invoke_type, optimized_invoke_type, clinit_check_requirement); } size_t start_index = 0; @@ -655,6 +731,12 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } + if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) { + // Add the class initialization check as last input of `invoke`. + DCHECK(clinit_check != nullptr); + invoke->SetArgumentAt(argument_index++, clinit_check); + } + DCHECK_EQ(argument_index, number_of_arguments); current_block_->AddInstruction(invoke); latest_result_ = invoke; @@ -732,7 +814,6 @@ bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const { return compiling_class.Get() == cls.Get(); } - bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -764,7 +845,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, if (is_referrer_class) { storage_index = referrer_class->GetDexTypeIndex(); } else if (outer_dex_cache.Get() != dex_cache.Get()) { - // The compiler driver cannot currently understand multple dex caches involved. Just bailout. + // The compiler driver cannot currently understand multiple dex caches involved. Just bailout. return false; } else { std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField( @@ -882,7 +963,7 @@ void HGraphBuilder::BuildArrayAccess(const Instruction& instruction, current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } - graph_->SetHasArrayAccesses(true); + graph_->SetHasBoundsChecks(true); } void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, @@ -984,6 +1065,7 @@ void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t default: LOG(FATAL) << "Unknown element width for " << payload->element_width; } + graph_->SetHasBoundsChecks(true); } void HGraphBuilder::BuildFillWideArrayData(HInstruction* object, diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 8ab759d393..5163395cac 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -612,7 +612,7 @@ void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const { } void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) { - uint32_t size = stack_map_stream_.ComputeNeededSize(); + uint32_t size = stack_map_stream_.PrepareForFillIn(); data->resize(size); MemoryRegion region(data->data(), size); stack_map_stream_.FillIn(region); @@ -654,7 +654,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth); + stack_map_stream_.BeginStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth); + stack_map_stream_.EndStackMapEntry(); return; } LocationSummary* locations = instruction->GetLocations(); @@ -672,12 +673,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.AddStackMapEntry(dex_pc, - pc_info.native_pc, - register_mask, - locations->GetStackMask(), - environment_size, - inlining_depth); + stack_map_stream_.BeginStackMapEntry(dex_pc, + pc_info.native_pc, + register_mask, + locations->GetStackMask(), + environment_size, + inlining_depth); // Walk over the environment, and record the location of dex registers. for (size_t i = 0; i < environment_size; ++i) { @@ -823,11 +824,14 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } + stack_map_stream_.EndStackMapEntry(); } bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); - return (first_next_not_move != nullptr) && first_next_not_move->CanDoImplicitNullCheck(); + + return (first_next_not_move != nullptr) + && first_next_not_move->CanDoImplicitNullCheckOn(null_check->InputAt(0)); } void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { @@ -842,7 +846,7 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { return; } - if (!instr->CanDoImplicitNullCheck()) { + if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) { return; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 38fa04315a..01748a9f5c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -176,7 +176,6 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { InvokeRuntimeCallingConvention calling_convention; __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); - arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); @@ -222,7 +221,6 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); @@ -1243,6 +1241,14 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation, but this step is not + // run in baseline. So we remove them manually here if we find them. + // TODO: Instead of this local workaround, address this properly. + if (invoke->IsStaticWithExplicitClinitCheck()) { + invoke->RemoveClinitCheckOrLoadClassAsLastInput(); + } + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { @@ -1267,6 +1273,10 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) } void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -3898,9 +3908,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. - // TODO: avoid this check if we know obj is not null. - __ cmp(obj, ShifterOperand(0)); - __ b(&zero, EQ); + // avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ cmp(obj, ShifterOperand(0)); + __ b(&zero, EQ); + } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, out, obj, class_offset); __ cmp(out, ShifterOperand(cls)); @@ -3919,8 +3931,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ LoadImmediate(out, 1); __ b(&done); } - __ Bind(&zero); - __ LoadImmediate(out, 0); + + if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) { + __ Bind(&zero); + __ LoadImmediate(out, 0); + } + if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -3946,9 +3962,11 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ cmp(obj, ShifterOperand(0)); - __ b(slow_path->GetExitLabel(), EQ); + // avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ cmp(obj, ShifterOperand(0)); + __ b(slow_path->GetExitLabel(), EQ); + } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 76e2ec6f42..dada4ce5bd 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -173,14 +173,13 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { InvokeRuntimeCallingConvention calling_convention; __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex()); - arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } // Move the class to the desired location. @@ -225,11 +224,10 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); @@ -1353,16 +1351,16 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); - bool is_object = value_type == Primitive::kPrimNot; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall); - if (is_object) { + if (instruction->NeedsTypeCheck()) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); } else { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { @@ -1375,33 +1373,42 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); - if (value_type == Primitive::kPrimNot) { + LocationSummary* locations = instruction->GetLocations(); + bool needs_runtime_call = locations->WillCall(); + + if (needs_runtime_call) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } else { - LocationSummary* locations = instruction->GetLocations(); Register obj = InputRegisterAt(instruction, 0); CPURegister value = InputCPURegisterAt(instruction, 2); Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); MemOperand destination = HeapOperand(obj); MacroAssembler* masm = GetVIXLAssembler(); - UseScratchRegisterScope temps(masm); BlockPoolsScope block_pools(masm); + { + // We use a block to end the scratch scope before the write barrier, thus + // freeing the temporary registers so they can be used in `MarkGCCard`. + UseScratchRegisterScope temps(masm); + + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); + destination = HeapOperand(obj, offset); + } else { + Register temp = temps.AcquireSameSizeAs(obj); + Register index_reg = InputRegisterAt(instruction, 1); + __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); + destination = HeapOperand(temp, offset); + } - if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); - destination = HeapOperand(obj, offset); - } else { - Register temp = temps.AcquireSameSizeAs(obj); - Register index_reg = InputRegisterAt(instruction, 1); - __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); - destination = HeapOperand(temp, offset); + codegen_->Store(value_type, value, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) { + codegen_->MarkGCCard(obj, value.W()); } - - codegen_->Store(value_type, value, destination); - codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -1443,8 +1450,10 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ Cbz(obj, slow_path->GetExitLabel()); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ Cbz(obj, slow_path->GetExitLabel()); + } // Compare the class of `obj` with `cls`. __ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset())); __ Cmp(obj_cls, cls); @@ -1846,9 +1855,11 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { vixl::Label done; // Return 0 if `obj` is null. - // TODO: Avoid this check if we know `obj` is not null. - __ Mov(out, 0); - __ Cbz(obj, &done); + // Avoid null check if we know `obj` is not null. + if (instruction->MustDoNullCheck()) { + __ Mov(out, 0); + __ Cbz(obj, &done); + } // Compare the class of `obj` with `cls`. __ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset())); @@ -1957,6 +1968,14 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation, but this step is not + // run in baseline. So we remove them manually here if we find them. + // TODO: Instead of this local workaround, address this properly. + if (invoke->IsStaticWithExplicitClinitCheck()) { + invoke->RemoveClinitCheckOrLoadClassAsLastInput(); + } + IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); if (intrinsic.TryDispatch(invoke)) { return; @@ -2007,6 +2026,10 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok } void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 87c74fb0cd..04999bedb0 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -174,7 +174,6 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString))); RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); @@ -208,7 +207,6 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex())); - x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ fs()->call(Address::Absolute(do_clinit_ ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage) : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType))); @@ -877,7 +875,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { - int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); if (constant == 0) { __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { @@ -1196,6 +1194,14 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation, but this step is not + // run in baseline. So we remove them manually here if we find them. + // TODO: Instead of this local workaround, address this properly. + if (invoke->IsStaticWithExplicitClinitCheck()) { + invoke->RemoveClinitCheckOrLoadClassAsLastInput(); + } + IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; @@ -1214,6 +1220,10 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) } void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -1556,10 +1566,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimDouble: @@ -1589,10 +1597,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimFloat: @@ -1813,37 +1819,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-float' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // Operations use doubles for precision reasons (each 32-bit - // half of a long fits in the 53-bit mantissa of a double, - // but not in the 24-bit mantissa of a float). This is - // especially important for the low bits. The result is - // eventually converted to float. - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // result = double-to-float(result) - __ cvtsd2ss(result, result); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsStackSlot()) { + __ fstps(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstps(Address(ESP, 0)); + Location stack_temp = Location::StackSlot(0); + codegen_->Move32(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -1872,29 +1872,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-double' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsDoubleStackSlot()) { + __ fstpl(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstpl(Address(ESP, 0)); + Location stack_temp = Location::DoubleStackSlot(0); + codegen_->Move64(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -2234,24 +2236,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } -void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float) { +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, + uint32_t temp_offset, + uint32_t stack_adjustment, + bool is_fp, + bool is_wide) { if (source.IsStackSlot()) { - DCHECK(is_float); - __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(!is_wide); + if (is_fp) { + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else if (source.IsDoubleStackSlot()) { - DCHECK(!is_float); - __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(is_wide); + if (is_fp) { + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else { // Write the value to the temporary location on the stack and load to FP stack. - if (is_float) { + if (!is_wide) { Location stack_temp = Location::StackSlot(temp_offset); codegen_->Move32(stack_temp, source); - __ flds(Address(ESP, temp_offset)); + if (is_fp) { + __ flds(Address(ESP, temp_offset)); + } else { + __ filds(Address(ESP, temp_offset)); + } } else { Location stack_temp = Location::DoubleStackSlot(temp_offset); codegen_->Move64(stack_temp, source); - __ fldl(Address(ESP, temp_offset)); + if (is_fp) { + __ fldl(Address(ESP, temp_offset)); + } else { + __ fildl(Address(ESP, temp_offset)); + } } } } @@ -2270,8 +2291,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ subl(ESP, Immediate(2 * elem_size)); // Load the values to the FP stack in reverse order, using temporaries if needed. - PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); - PushOntoFPStack(first, 0, 2 * elem_size, is_float); + const bool is_wide = !is_float; + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); // Loop doing FPREM until we stabilize. Label retry; @@ -3797,7 +3819,7 @@ void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -3809,16 +3831,38 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { Location length_loc = locations->InAt(1); SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction, index_loc, length_loc); - codegen_->AddSlowPath(slow_path); - Register length = length_loc.AsRegister<Register>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); + if (length_loc.IsConstant()) { + int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); + if (index_loc.IsConstant()) { + // BCE will remove the bounds check if we are guarenteed to pass. + int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + if (index < 0 || index >= length) { + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + } else { + // Some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } + + // We have to reverse the jump condition because the length is the constant. + Register index_reg = index_loc.AsRegister<Register>(); + __ cmpl(index_reg, Immediate(length)); + codegen_->AddSlowPath(slow_path); + __ j(kAboveEqual, slow_path->GetEntryLabel()); } else { - __ cmpl(length, index_loc.AsRegister<Register>()); + Register length = length_loc.AsRegister<Register>(); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(length, Immediate(value)); + } else { + __ cmpl(length, index_loc.AsRegister<Register>()); + } + codegen_->AddSlowPath(slow_path); + __ j(kBelowEqual, slow_path->GetEntryLabel()); } - __ j(kBelowEqual, slow_path->GetEntryLabel()); } void LocationsBuilderX86::VisitTemporary(HTemporary* temp) { @@ -4238,9 +4282,11 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { SlowPathCodeX86* slow_path = nullptr; // Return 0 if `obj` is null. - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, &zero); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, &zero); + } __ movl(out, Address(obj, class_offset)); // Compare the class of `obj` with `cls`. if (cls.IsRegister()) { @@ -4265,8 +4311,12 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ movl(out, Immediate(1)); __ jmp(&done); } - __ Bind(&zero); - __ movl(out, Immediate(0)); + + if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) { + __ Bind(&zero); + __ movl(out, Immediate(0)); + } + if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -4291,11 +4341,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, slow_path->GetExitLabel()); - __ movl(temp, Address(obj, class_offset)); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, slow_path->GetExitLabel()); + } + __ movl(temp, Address(obj, class_offset)); // Compare the class of `obj` with `cls`. if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 07476c6850..8bd3cd3585 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -174,8 +174,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. + // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float); + uint32_t stack_adjustment, bool is_fp, bool is_wide); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index d8d2ae381b..5ce932928b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -197,7 +197,6 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 { InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); __ gs()->call(Address::Absolute((do_clinit_ ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage) : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true)); @@ -244,7 +243,6 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); __ gs()->call(Address::Absolute( @@ -1023,14 +1021,14 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1))); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister()); break; } @@ -1052,24 +1050,46 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { CpuRegister left_reg = left.AsRegister<CpuRegister>(); if (right.IsConstant()) { int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(value)); - if (value == 0) { - __ testq(left_reg, left_reg); + if (IsInt<32>(value)) { + if (value == 0) { + __ testq(left_reg, left_reg); + } else { + __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); + } } else { - __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); + // Value won't fit in an int. + __ cmpq(left_reg, codegen_->LiteralInt64Address(value)); } + } else if (right.IsDoubleStackSlot()) { + __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); } else { __ cmpq(left_reg, right.AsRegister<CpuRegister>()); } break; } case Primitive::kPrimFloat: { - __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); + if (right.IsConstant()) { + float value = right.GetConstant()->AsFloatConstant()->GetValue(); + __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value)); + } else if (right.IsStackSlot()) { + __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); + } else { + __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); + } __ j(kUnordered, compare->IsGtBias() ? &greater : &less); break; } case Primitive::kPrimDouble: { - __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); + if (right.IsConstant()) { + double value = right.GetConstant()->AsDoubleConstant()->GetValue(); + __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value)); + } else if (right.IsDoubleStackSlot()) { + __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); + } else { + __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); + } __ j(kUnordered, compare->IsGtBias() ? &greater : &less); break; } @@ -1178,8 +1198,7 @@ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: - locations->SetInAt(0, - Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); break; default: @@ -1270,6 +1289,14 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation, but this step is not + // run in baseline. So we remove them manually here if we find them. + // TODO: Instead of this local workaround, address this properly. + if (invoke->IsStaticWithExplicitClinitCheck()) { + invoke->RemoveClinitCheckOrLoadClassAsLastInput(); + } + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; @@ -1288,6 +1315,10 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg } void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -1419,7 +1450,6 @@ void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); break; @@ -1447,26 +1477,22 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { case Primitive::kPrimFloat: { DCHECK(in.Equals(out)); - CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>(); - XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement float negation with an exclusive or with value // 0x80000000 (mask for bit 31, representing the sign of a // single-precision floating-point number). - __ movq(constant, Immediate(INT64_C(0x80000000))); - __ movd(mask, constant); + __ movss(mask, codegen_->LiteralInt32Address(0x80000000)); __ xorps(out.AsFpuRegister<XmmRegister>(), mask); break; } case Primitive::kPrimDouble: { DCHECK(in.Equals(out)); - CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>(); - XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement double negation with an exclusive or with value // 0x8000000000000000 (mask for bit 63, representing the sign of // a double-precision floating-point number). - __ movq(constant, Immediate(INT64_C(0x8000000000000000))); - __ movd(mask, constant); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000))); __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); break; } @@ -1613,19 +1639,19 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-float' instruction. - locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -1644,19 +1670,19 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: // Processing a Dex `float-to-double' instruction. - locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -1910,17 +1936,56 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-float' instruction. - __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + if (in.IsRegister()) { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + } else if (in.IsConstant()) { + int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); + } + } else { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), false); + } break; case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + if (in.IsRegister()) { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + } else if (in.IsConstant()) { + int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); + } + } else { + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), true); + } break; case Primitive::kPrimDouble: // Processing a Dex `double-to-float' instruction. - __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + if (in.IsFpuRegister()) { + __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + } else if (in.IsConstant()) { + double v = in.GetConstant()->AsDoubleConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (bit_cast<int64_t, double>(v) == 0) { + __ xorps(dest, dest); + } else { + __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v))); + } + } else { + __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex())); + } break; default: @@ -1938,17 +2003,56 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-double' instruction. - __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + if (in.IsRegister()) { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); + } else if (in.IsConstant()) { + int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); + } + } else { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), false); + } break; case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + if (in.IsRegister()) { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + } else if (in.IsConstant()) { + int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (v == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); + } + } else { + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex()), true); + } break; case Primitive::kPrimFloat: // Processing a Dex `float-to-double' instruction. - __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + if (in.IsFpuRegister()) { + __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); + } else if (in.IsConstant()) { + float v = in.GetConstant()->AsFloatConstant()->GetValue(); + XmmRegister dest = out.AsFpuRegister<XmmRegister>(); + if (bit_cast<int32_t, float>(v) == 0) { + __ xorpd(dest, dest); + } else { + __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v))); + } + } else { + __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex())); + } break; default: @@ -3128,7 +3232,7 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); } if (needs_write_barrier) { // Temporary registers for the write barrier. @@ -3155,24 +3259,46 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movb(Address(base, offset), Immediate(v)); + } else { + __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimLong: { - __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); + if (value.IsConstant()) { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(base, offset), Immediate(v_32)); + } else { + __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); + } break; } @@ -3291,8 +3417,7 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { @@ -3431,7 +3556,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { 1, Location::RegisterOrConstant(instruction->InputAt(1))); locations->SetInAt(2, Location::RequiresRegister()); if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { @@ -3519,8 +3644,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(Address(obj, offset), value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; - __ movl(Address(obj, offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(obj, offset), Immediate(v)); } } else { DCHECK(index.IsRegister()) << index; @@ -3529,8 +3654,9 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + Immediate(v)); } } codegen_->MaybeRecordImplicitNullCheck(instruction); @@ -3554,12 +3680,25 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - DCHECK(value.IsRegister()); - __ movq(Address(obj, offset), value.AsRegister<CpuRegister>()); + if (value.IsRegister()) { + __ movq(Address(obj, offset), value.AsRegister<CpuRegister>()); + } else { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(obj, offset), Immediate(v_32)); + } } else { - DCHECK(value.IsRegister()); - __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), - value.AsRegister<CpuRegister>()); + if (value.IsRegister()) { + __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), + value.AsRegister<CpuRegister>()); + } else { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), + Immediate(v_32)); + } } codegen_->MaybeRecordImplicitNullCheck(instruction); break; @@ -3621,7 +3760,7 @@ void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -3633,16 +3772,38 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) Location length_loc = locations->InAt(1); SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction, index_loc, length_loc); - codegen_->AddSlowPath(slow_path); - CpuRegister length = length_loc.AsRegister<CpuRegister>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); + if (length_loc.IsConstant()) { + int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); + if (index_loc.IsConstant()) { + // BCE will remove the bounds check if we are guarenteed to pass. + int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + if (index < 0 || index >= length) { + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + } else { + // Some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } + + // We have to reverse the jump condition because the length is the constant. + CpuRegister index_reg = index_loc.AsRegister<CpuRegister>(); + __ cmpl(index_reg, Immediate(length)); + codegen_->AddSlowPath(slow_path); + __ j(kAboveEqual, slow_path->GetEntryLabel()); } else { - __ cmpl(length, index_loc.AsRegister<CpuRegister>()); + CpuRegister length = length_loc.AsRegister<CpuRegister>(); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(length, Immediate(value)); + } else { + __ cmpl(length, index_loc.AsRegister<CpuRegister>()); + } + codegen_->AddSlowPath(slow_path); + __ j(kBelowEqual, slow_path->GetEntryLabel()); } - __ j(kBelowEqual, slow_path->GetEntryLabel()); } void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, @@ -4052,9 +4213,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { SlowPathCodeX86_64* slow_path = nullptr; // Return 0 if `obj` is null. - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, &zero); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, &zero); + } // Compare the class of `obj` with `cls`. __ movl(out, Address(obj, class_offset)); if (cls.IsRegister()) { @@ -4078,8 +4241,12 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ movl(out, Immediate(1)); __ jmp(&done); } - __ Bind(&zero); - __ movl(out, Immediate(0)); + + if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) { + __ Bind(&zero); + __ movl(out, Immediate(0)); + } + if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -4104,9 +4271,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - // TODO: avoid this check if we know obj is not null. - __ testl(obj, obj); - __ j(kEqual, slow_path->GetExitLabel()); + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ testl(obj, obj); + __ j(kEqual, slow_path->GetExitLabel()); + } // Compare the class of `obj` with `cls`. __ movl(temp, Address(obj, class_offset)); if (cls.IsRegister()) { @@ -4145,13 +4314,7 @@ void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instructio DCHECK(instruction->GetResultType() == Primitive::kPrimInt || instruction->GetResultType() == Primitive::kPrimLong); locations->SetInAt(0, Location::RequiresRegister()); - if (instruction->GetType() == Primitive::kPrimInt) { - locations->SetInAt(1, Location::Any()); - } else { - // We can handle 32 bit constants. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); - } + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); } @@ -4212,25 +4375,43 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in if (second.IsConstant()) { second_is_constant = true; value = second.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(value)); } + bool is_int32_value = IsInt<32>(value); if (instruction->IsAnd()) { if (second_is_constant) { - __ andq(first_reg, Immediate(static_cast<int32_t>(value))); + if (is_int32_value) { + __ andq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ andq(first_reg, codegen_->LiteralInt64Address(value)); + } + } else if (second.IsDoubleStackSlot()) { + __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); } else { __ andq(first_reg, second.AsRegister<CpuRegister>()); } } else if (instruction->IsOr()) { if (second_is_constant) { - __ orq(first_reg, Immediate(static_cast<int32_t>(value))); + if (is_int32_value) { + __ orq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ orq(first_reg, codegen_->LiteralInt64Address(value)); + } + } else if (second.IsDoubleStackSlot()) { + __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); } else { __ orq(first_reg, second.AsRegister<CpuRegister>()); } } else { DCHECK(instruction->IsXor()); if (second_is_constant) { - __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); + if (is_int32_value) { + __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ xorq(first_reg, codegen_->LiteralInt64Address(value)); + } + } else if (second.IsDoubleStackSlot()) { + __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); } else { __ xorq(first_reg, second.AsRegister<CpuRegister>()); } diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index ac00824e33..66ff57855e 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -32,8 +32,8 @@ namespace art { */ class HConstantFolding : public HOptimization { public: - explicit HConstantFolding(HGraph* graph) - : HOptimization(graph, true, kConstantFoldingPassName) {} + explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName) + : HOptimization(graph, true, name) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 02ad675dc3..422223f5e0 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -572,14 +572,19 @@ TEST(ConstantFolding, IntConstantFoldingAndJumps) { }; // Expected difference after dead code elimination. - diff_t expected_dce_diff = { - { " 3: IntConstant\n", removed }, - { " 13: IntConstant\n", removed }, - { " 18: IntConstant\n", removed }, - { " 24: IntConstant\n", removed }, - { " 34: IntConstant\n", removed }, - }; - std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); + std::string expected_after_dce = + "BasicBlock 0, succ: 1\n" + " 5: IntConstant []\n" + " 30: SuspendCheck\n" + " 32: IntConstant []\n" + " 33: IntConstant []\n" + " 35: IntConstant [28]\n" + " 31: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 5\n" + " 21: SuspendCheck\n" + " 28: Return(35)\n" + "BasicBlock 5, pred: 1\n" + " 29: Exit\n"; TestCode(data, expected_before, @@ -647,13 +652,15 @@ TEST(ConstantFolding, ConstantCondition) { ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1); }; - // Expected difference after dead code elimination. - diff_t expected_dce_diff = { - { " 3: IntConstant [9, 15, 22]\n", " 3: IntConstant [9, 22]\n" }, - { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, - { " 15: Add(22, 3)\n", removed } - }; - std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); + // Expected graph after dead code elimination. + std::string expected_after_dce = + "BasicBlock 0, succ: 1\n" + " 19: SuspendCheck\n" + " 20: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 4\n" + " 17: ReturnVoid\n" + "BasicBlock 4, pred: 1\n" + " 18: Exit\n"; TestCode(data, expected_before, diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 94990402e5..91cd60acce 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -20,10 +20,78 @@ namespace art { -void HDeadCodeElimination::Run() { +static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { + int block_id = block->GetBlockId(); + if (visited->IsBitSet(block_id)) { + return; + } + visited->SetBit(block_id); + + HInstruction* last_instruction = block->GetLastInstruction(); + if (last_instruction->IsIf()) { + HIf* if_instruction = last_instruction->AsIf(); + HInstruction* condition = if_instruction->InputAt(0); + if (!condition->IsIntConstant()) { + MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); + MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); + } else if (condition->AsIntConstant()->IsOne()) { + MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); + } else { + DCHECK(condition->AsIntConstant()->IsZero()); + MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); + } + } else { + for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { + MarkReachableBlocks(block->GetSuccessors().Get(i), visited); + } + } +} + +void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction, + block->GetPhis().CountSize() + block->GetInstructions().CountSize()); + } +} + +void HDeadCodeElimination::RemoveDeadBlocks() { + // Classify blocks as reachable/unreachable. + ArenaAllocator* allocator = graph_->GetArena(); + ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false); + MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); + + // Remove all dead blocks. Process blocks in post-order, because removal needs + // the block's chain of dominators. + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (live_blocks.IsBitSet(block->GetBlockId())) { + continue; + } + MaybeRecordDeadBlock(block); + block->DisconnectAndDelete(); + } + + // Connect successive blocks created by dead branches. Order does not matter. + for (HReversePostOrderIterator it(*graph_); !it.Done();) { + HBasicBlock* block = it.Current(); + if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) { + it.Advance(); + continue; + } + HBasicBlock* successor = block->GetSuccessors().Get(0); + if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) { + it.Advance(); + continue; + } + block->MergeWith(successor); + + // Reiterate on this block in case it can be merged with its new successor. + } +} + +void HDeadCodeElimination::RemoveDeadInstructions() { // Process basic blocks in post-order in the dominator tree, so that - // a dead instruction depending on another dead instruction is - // removed. + // a dead instruction depending on another dead instruction is removed. for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { HBasicBlock* block = b.Current(); // Traverse this block's instructions in backward order and remove @@ -41,9 +109,15 @@ void HDeadCodeElimination::Run() { && !inst->IsMemoryBarrier() // If we added an explicit barrier then we should keep it. && !inst->HasUses()) { block->RemoveInstruction(inst); + MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction); } } } } +void HDeadCodeElimination::Run() { + RemoveDeadBlocks(); + RemoveDeadInstructions(); +} + } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 3db2c3ff3f..0bea0fc1c2 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -19,6 +19,7 @@ #include "nodes.h" #include "optimization.h" +#include "optimizing_compiler_stats.h" namespace art { @@ -28,8 +29,10 @@ namespace art { */ class HDeadCodeElimination : public HOptimization { public: - explicit HDeadCodeElimination(HGraph* graph) - : HOptimization(graph, true, kDeadCodeEliminationPassName) {} + HDeadCodeElimination(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kDeadCodeEliminationPassName) + : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; @@ -37,6 +40,10 @@ class HDeadCodeElimination : public HOptimization { "dead_code_elimination"; private: + void MaybeRecordDeadBlock(HBasicBlock* block); + void RemoveDeadBlocks(); + void RemoveDeadInstructions(); + DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination); }; diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 98ae1ec5d3..3209d3eb18 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -169,20 +169,25 @@ TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) { "BasicBlock 5, pred: 4\n" " 28: Exit\n"; - // Expected difference after dead code elimination. - diff_t expected_diff = { - { " 13: IntConstant [14]\n", removed }, - { " 24: IntConstant [25]\n", removed }, - { " 14: Add(19, 13) [25]\n", removed }, - // The SuspendCheck instruction following this Add instruction - // inserts the latter in an environment, thus making it "used" and - // therefore non removable. It ensues that some other Add and - // IntConstant instructions cannot be removed, as they are direct - // or indirect inputs of the initial Add instruction. - { " 19: Add(9, 18) [14]\n", " 19: Add(9, 18) []\n" }, - { " 25: Add(14, 24)\n", removed }, - }; - std::string expected_after = Patch(expected_before, expected_diff); + // The SuspendCheck instruction following this Add instruction + // inserts the latter in an environment, thus making it "used" and + // therefore non removable. It ensures that some other Add and + // IntConstant instructions cannot be removed, as they are direct + // or indirect inputs of the initial Add instruction. + std::string expected_after = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 18: IntConstant [19]\n" + " 29: SuspendCheck\n" + " 30: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 5\n" + " 9: Add(3, 5) [19]\n" + " 19: Add(9, 18) []\n" + " 21: SuspendCheck\n" + " 27: ReturnVoid\n" + "BasicBlock 5, pred: 1\n" + " 28: Exit\n"; TestCode(data, expected_before, expected_after); } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 2216cecc2b..dc3124b35f 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -88,24 +88,49 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { // Visit this block's list of phis. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); // Ensure this block's list of phis contains only phis. - if (!it.Current()->IsPhi()) { + if (!current->IsPhi()) { AddError(StringPrintf("Block %d has a non-phi in its phi list.", current_block_->GetBlockId())); } - it.Current()->Accept(this); + if (current->GetNext() == nullptr && current != block->GetLastPhi()) { + AddError(StringPrintf("The recorded last phi of block %d does not match " + "the actual last phi %d.", + current_block_->GetBlockId(), + current->GetId())); + } + current->Accept(this); } // Visit this block's list of instructions. - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); - it.Advance()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); // Ensure this block's list of instructions does not contains phis. - if (it.Current()->IsPhi()) { + if (current->IsPhi()) { AddError(StringPrintf("Block %d has a phi in its non-phi list.", current_block_->GetBlockId())); } - it.Current()->Accept(this); + if (current->GetNext() == nullptr && current != block->GetLastInstruction()) { + AddError(StringPrintf("The recorded last instruction of block %d does not match " + "the actual last instruction %d.", + current_block_->GetBlockId(), + current->GetId())); + } + current->Accept(this); + } +} + +void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { + if (!GetGraph()->HasBoundsChecks()) { + AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, " + "but HasBoundsChecks() returns false", + check->DebugName(), + check->GetId())); } + + // Perform the instruction base checks too. + VisitInstruction(check); } void GraphChecker::VisitInstruction(HInstruction* instruction) { @@ -178,6 +203,30 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } +void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + VisitInstruction(invoke); + + if (invoke->IsStaticWithExplicitClinitCheck()) { + size_t last_input_index = invoke->InputCount() - 1; + HInstruction* last_input = invoke->InputAt(last_input_index); + if (last_input == nullptr) { + AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " + "has a null pointer as last input.", + invoke->DebugName(), + invoke->GetId())); + } + if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) { + AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " + "has a last instruction (%s:%d) which is neither a clinit check " + "nor a load class instruction.", + invoke->DebugName(), + invoke->GetId(), + last_input->DebugName(), + last_input->GetId())); + } + } +} + void SSAChecker::VisitBasicBlock(HBasicBlock* block) { super_type::VisitBasicBlock(block); @@ -251,6 +300,8 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { } } + const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks(); + // Ensure there is only one back edge per loop. size_t num_back_edges = loop_header->GetLoopInformation()->GetBackEdges().Size(); @@ -263,19 +314,41 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { "Loop defined by header %d has several back edges: %zu.", id, num_back_edges)); + } else { + DCHECK_EQ(num_back_edges, 1u); + int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } } - // Ensure all blocks in the loop are dominated by the loop header. - const ArenaBitVector& loop_blocks = - loop_header->GetLoopInformation()->GetBlocks(); + // Ensure all blocks in the loop are live and dominated by the loop header. for (uint32_t i : loop_blocks.Indexes()) { HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i); - if (!loop_header->Dominates(loop_block)) { + if (loop_block == nullptr) { + AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.", + id, + i)); + } else if (!loop_header->Dominates(loop_block)) { AddError(StringPrintf("Loop block %d not dominated by loop header %d.", - loop_block->GetBlockId(), + i, id)); } } + + // If this is a nested loop, ensure the outer loops contain a superset of the blocks. + for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) { + HLoopInformation* outer_info = it.Current(); + if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) { + AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of " + "an outer loop defined by header %d.", + id, + outer_info->GetHeader()->GetBlockId())); + } + } } void SSAChecker::VisitInstruction(HInstruction* instruction) { @@ -459,7 +532,7 @@ void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); } } else { - if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) { + if (PrimitiveKind(op->InputAt(0)->GetType()) != PrimitiveKind(op->InputAt(1)->GetType())) { AddError(StringPrintf( "Binary operation %s %d has inputs of different types: " "%s, and %s.", @@ -484,7 +557,7 @@ void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { "from its input type: %s vs %s.", op->DebugName(), op->GetId(), Primitive::PrettyDescriptor(op->GetType()), - Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); + Primitive::PrettyDescriptor(op->InputAt(0)->GetType()))); } } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 24fee373f9..b4314da03b 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -42,6 +42,12 @@ class GraphChecker : public HGraphDelegateVisitor { // Check `instruction`. void VisitInstruction(HInstruction* instruction) OVERRIDE; + // Perform control-flow graph checks on instruction. + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + + // Check that the HasBoundsChecks() flag is set for bounds checks. + void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; + // Was the last visit of the graph valid? bool IsValid() const { return errors_.empty(); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 74848d5d96..708733e28c 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -55,7 +55,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { buckets_owned_(allocator, num_buckets_, false), num_entries_(to_copy.num_entries_) { // ArenaAllocator returns zeroed memory, so entries of buckets_ and - // buckets_owned_ are initialized to nullptr and false, respectively. + // buckets_owned_ are initialized to null and false, respectively. DCHECK(IsPowerOfTwo(num_buckets_)); if (num_buckets_ == to_copy.num_buckets_) { // Hash table remains the same size. We copy the bucket pointers and leave diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 6d2a8d77e2..ada32db047 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -130,6 +130,16 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } + if (invoke_instruction->IsInvokeStaticOrDirect() && + invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { + // Case of a static method that cannot be inlined because it implicitly + // requires an initialization check of its declaring class. + VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; + return false; + } + if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) { resolved_method->SetShouldNotInline(); return false; @@ -190,7 +200,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, } // Run simple optimizations on the graph. - HDeadCodeElimination dce(callee_graph); + HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); InstructionSimplifier simplify(callee_graph, stats_); @@ -258,8 +268,8 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, callee_graph->InlineInto(graph_, invoke_instruction); - if (callee_graph->HasArrayAccesses()) { - graph_->SetHasArrayAccesses(true); + if (callee_graph->HasBoundsChecks()) { + graph_->SetHasBoundsChecks(true); } return true; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index b8ae1f6369..2df7c166d8 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -62,6 +62,7 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitSub(HSub* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; + void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -89,10 +90,6 @@ void InstructionSimplifierVisitor::Run() { // current index, so don't advance the iterator. continue; } - if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) { - LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_ - << ") occurred at the current position."; - } simplifications_at_current_position_ = 0; it.Advance(); } @@ -163,6 +160,10 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + if (!check_cast->InputAt(0)->CanBeNull()) { + check_cast->ClearMustDoNullCheck(); + } + if (!load_class->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type @@ -180,6 +181,12 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { } } +void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { + if (!instruction->InputAt(0)->CanBeNull()) { + instruction->ClearMustDoNullCheck(); + } +} + void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) { HBasicBlock* block = check->GetBlock(); // Currently always keep the suspend check at entry. @@ -431,9 +438,16 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { if (Primitive::IsIntOrLongType(type)) { int64_t factor = Int64FromConstant(input_cst); - // We expect the `0` case to have been handled in the constant folding pass. - DCHECK_NE(factor, 0); - if (IsPowerOfTwo(factor)) { + // Even though constant propagation also takes care of the zero case, other + // optimizations can lead to having a zero multiplication. + if (factor == 0) { + // Replace code looking like + // MUL dst, src, 0 + // with + // 0 + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (IsPowerOfTwo(factor)) { // Replace code looking like // MUL dst, src, pow_of_2 // with @@ -467,7 +481,8 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { return; } - if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) { + if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse() && + !Primitive::IsFloatingPointType(input->GetType())) { // Replace code looking like // SUB tmp, a, b // NEG dst, tmp @@ -478,6 +493,7 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { // worse code. In particular, we do not want the live ranges of `a` and `b` // to be extended if we are not sure the initial 'SUB' instruction can be // removed. + // We do not perform optimization for fp because we could lose the sign of zero. HSub* sub = input->AsSub(); HSub* new_sub = new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft()); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 9a6062fedf..abdf04ebb1 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -79,6 +79,7 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { if (invoke->InputCount() == 0) { + // No argument to move. return; } @@ -863,7 +864,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = locations->InAt(1).AsRegister<Register>(); __ cmp(argument, ShifterOperand(0)); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d3a4e6ca15..7a753b2da9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -88,6 +88,7 @@ static void MoveFromReturnRegister(Location trg, static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) { if (invoke->InputCount() == 0) { + // No argument to move. return; } @@ -1007,7 +1008,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = WRegisterFrom(locations->InAt(1)); __ Cmp(argument, 0); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 3c7a2660db..7275edb695 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -113,6 +113,7 @@ static void MoveFromReturnRegister(Location target, static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { if (invoke->InputCount() == 0) { + // No argument to move. return; } @@ -828,7 +829,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); return; @@ -962,7 +963,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = locations->InAt(1).AsRegister<Register>(); __ testl(argument, argument); @@ -1038,7 +1039,7 @@ static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - HInstruction *value = invoke->InputAt(1); + HInstruction* value = invoke->InputAt(1); if (size == Primitive::kPrimByte) { locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); } else { diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index d9a1c31c77..35daaf60bb 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -105,6 +105,7 @@ static void MoveFromReturnRegister(Location trg, static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { if (invoke->InputCount() == 0) { + // No argument to move. return; } @@ -704,7 +705,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); return; } @@ -732,14 +732,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; X86_64Assembler* assembler = GetAssembler(); - // Generate 0.5 into inPlusPointFive. - __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); - __ movd(inPlusPointFive, out, false); + // Load 0.5 into inPlusPointFive. + __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f)); // Add in the input. __ addss(inPlusPointFive, in); @@ -747,12 +745,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { // And truncate to an integer. __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); - __ movl(out, Immediate(kPrimIntMax)); - // maxInt = int-to-float(out) - __ cvtsi2ss(maxInt, out); - // if inPlusPointFive >= maxInt goto done - __ comiss(inPlusPointFive, maxInt); + __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan @@ -782,14 +776,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; X86_64Assembler* assembler = GetAssembler(); - // Generate 0.5 into inPlusPointFive. - __ movq(out, Immediate(bit_cast<int64_t, double>(0.5))); - __ movd(inPlusPointFive, out, true); + // Load 0.5 into inPlusPointFive. + __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5)); // Add in the input. __ addsd(inPlusPointFive, in); @@ -797,12 +789,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // And truncate to an integer. __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); - __ movq(out, Immediate(kPrimLongMax)); - // maxLong = long-to-double(out) - __ cvtsi2sd(maxLong, out, true); - // if inPlusPointFive >= maxLong goto done - __ comisd(inPlusPointFive, maxLong); + __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan @@ -886,7 +874,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheck()); + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); __ testl(argument, argument); @@ -960,26 +948,48 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1))); } static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); + Location value = locations->InAt(1); // x86 allows unaligned access. We do not have to check the input or use specific instructions // to avoid a SIGBUS. switch (size) { case Primitive::kPrimByte: - __ movb(Address(address, 0), value); + if (value.IsConstant()) { + __ movb(Address(address, 0), + Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); + } else { + __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; case Primitive::kPrimShort: - __ movw(Address(address, 0), value); + if (value.IsConstant()) { + __ movw(Address(address, 0), + Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); + } else { + __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; case Primitive::kPrimInt: - __ movl(Address(address, 0), value); + if (value.IsConstant()) { + __ movl(Address(address, 0), + Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); + } else { + __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; case Primitive::kPrimLong: - __ movq(Address(address, 0), value); + if (value.IsConstant()) { + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(Address(address, 0), Immediate(v_32)); + } else { + __ movq(Address(address, 0), value.AsRegister<CpuRegister>()); + } break; default: LOG(FATAL) << "Type not recognized for poke: " << size; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 5fca4fab22..c158ddf4ee 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -416,26 +416,6 @@ static void UpdateInputsUsers(HInstruction* instruction) { DCHECK(!instruction->HasEnvironment()); } -void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { - DCHECK(!cursor->IsPhi()); - DCHECK(!instruction->IsPhi()); - DCHECK_EQ(instruction->GetId(), -1); - DCHECK_NE(cursor->GetId(), -1); - DCHECK_EQ(cursor->GetBlock(), this); - DCHECK(!instruction->IsControlFlow()); - instruction->next_ = cursor; - instruction->previous_ = cursor->previous_; - cursor->previous_ = instruction; - if (GetFirstInstruction() == cursor) { - instructions_.first_instruction_ = instruction; - } else { - instruction->previous_->next_ = instruction; - } - instruction->SetBlock(this); - instruction->SetId(GetGraph()->GetNextInstructionId()); - UpdateInputsUsers(instruction); -} - void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement) { DCHECK(initial->GetBlock() == this); @@ -463,23 +443,27 @@ void HBasicBlock::AddPhi(HPhi* phi) { Add(&phis_, this, phi); } +void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { + DCHECK(!cursor->IsPhi()); + DCHECK(!instruction->IsPhi()); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(instruction); + instructions_.InsertInstructionBefore(instruction, cursor); +} + void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { DCHECK_EQ(phi->GetId(), -1); DCHECK_NE(cursor->GetId(), -1); DCHECK_EQ(cursor->GetBlock(), this); - if (cursor->next_ == nullptr) { - cursor->next_ = phi; - phi->previous_ = cursor; - DCHECK(phi->next_ == nullptr); - } else { - phi->next_ = cursor->next_; - phi->previous_ = cursor; - cursor->next_ = phi; - phi->next_->previous_ = phi; - } phi->SetBlock(this); phi->SetId(GetGraph()->GetNextInstructionId()); UpdateInputsUsers(phi); + phis_.InsertInstructionAfter(phi, cursor); } static void Remove(HInstructionList* instruction_list, @@ -546,6 +530,34 @@ void HInstructionList::AddInstruction(HInstruction* instruction) { } } +void HInstructionList::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { + DCHECK(Contains(cursor)); + if (cursor == first_instruction_) { + cursor->previous_ = instruction; + instruction->next_ = cursor; + first_instruction_ = instruction; + } else { + instruction->previous_ = cursor->previous_; + instruction->next_ = cursor; + cursor->previous_ = instruction; + instruction->previous_->next_ = instruction; + } +} + +void HInstructionList::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) { + DCHECK(Contains(cursor)); + if (cursor == last_instruction_) { + cursor->next_ = instruction; + instruction->previous_ = cursor; + last_instruction_ = instruction; + } else { + instruction->next_ = cursor->next_; + instruction->previous_ = cursor; + cursor->next_ = instruction; + instruction->next_->previous_ = instruction; + } +} + void HInstructionList::RemoveInstruction(HInstruction* instruction) { if (instruction->previous_ != nullptr) { instruction->previous_->next_ = instruction->next_; @@ -660,6 +672,11 @@ void HPhi::AddInput(HInstruction* input) { input->AddUseAt(this, inputs_.Size() - 1); } +void HPhi::RemoveInputAt(size_t index) { + RemoveAsUserOfInput(index); + inputs_.DeleteAt(index); +} + #define DEFINE_ACCEPT(name, super) \ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ @@ -702,7 +719,7 @@ HConstant* HUnaryOperation::TryStaticEvaluation() const { // TODO: Implement static evaluation of long unary operations. // // Do not exit with a fatal condition here. Instead, simply - // return `nullptr' to notify the caller that this instruction + // return `null' to notify the caller that this instruction // cannot (yet) be statically evaluated. return nullptr; } @@ -738,7 +755,7 @@ HConstant* HBinaryOperation::GetConstantRight() const { } // If `GetConstantRight()` returns one of the input, this returns the other -// one. Otherwise it returns nullptr. +// one. Otherwise it returns null. HInstruction* HBinaryOperation::GetLeastConstantLeft() const { HInstruction* most_constant_right = GetConstantRight(); if (most_constant_right == nullptr) { @@ -855,6 +872,15 @@ bool HBasicBlock::HasSinglePhi() const { return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr; } +size_t HInstructionList::CountSize() const { + size_t size = 0; + HInstruction* current = first_instruction_; + for (; current != nullptr; current = current->GetNext()) { + size++; + } + return size; +} + void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const { for (HInstruction* current = first_instruction_; current != nullptr; @@ -886,40 +912,167 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } -void HBasicBlock::DisconnectFromAll() { - DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario"; +void HBasicBlock::DisconnectAndDelete() { + // Dominators must be removed after all the blocks they dominate. This way + // a loop header is removed last, a requirement for correct loop information + // iteration. + DCHECK(dominated_blocks_.IsEmpty()); + // Remove the block from all loops it is included in. + for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) { + HLoopInformation* loop_info = it.Current(); + loop_info->Remove(this); + if (loop_info->IsBackEdge(*this)) { + // This deliberately leaves the loop in an inconsistent state and will + // fail SSAChecker unless the entire loop is removed during the pass. + loop_info->RemoveBackEdge(this); + } + } + + // Disconnect the block from its predecessors and update their control-flow + // instructions. for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) { - predecessors_.Get(i)->successors_.Delete(this); + HBasicBlock* predecessor = predecessors_.Get(i); + HInstruction* last_instruction = predecessor->GetLastInstruction(); + predecessor->RemoveInstruction(last_instruction); + predecessor->RemoveSuccessor(this); + if (predecessor->GetSuccessors().Size() == 1u) { + DCHECK(last_instruction->IsIf()); + predecessor->AddInstruction(new (graph_->GetArena()) HGoto()); + } else { + // The predecessor has no remaining successors and therefore must be dead. + // We deliberately leave it without a control-flow instruction so that the + // SSAChecker fails unless it is not removed during the pass too. + DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u); + } } + predecessors_.Reset(); + + // Disconnect the block from its successors and update their dominators + // and phis. for (size_t i = 0, e = successors_.Size(); i < e; ++i) { - successors_.Get(i)->predecessors_.Delete(this); - } - dominator_->dominated_blocks_.Delete(this); + HBasicBlock* successor = successors_.Get(i); + // Delete this block from the list of predecessors. + size_t this_index = successor->GetPredecessorIndexOf(this); + successor->predecessors_.DeleteAt(this_index); + + // Check that `successor` has other predecessors, otherwise `this` is the + // dominator of `successor` which violates the order DCHECKed at the top. + DCHECK(!successor->predecessors_.IsEmpty()); + + // Recompute the successor's dominator. + HBasicBlock* old_dominator = successor->GetDominator(); + HBasicBlock* new_dominator = successor->predecessors_.Get(0); + for (size_t j = 1, f = successor->predecessors_.Size(); j < f; ++j) { + new_dominator = graph_->FindCommonDominator( + new_dominator, successor->predecessors_.Get(j)); + } + if (old_dominator != new_dominator) { + successor->SetDominator(new_dominator); + old_dominator->RemoveDominatedBlock(successor); + new_dominator->AddDominatedBlock(successor); + } - predecessors_.Reset(); + // Remove this block's entries in the successor's phis. + if (successor->predecessors_.Size() == 1u) { + // The successor has just one predecessor left. Replace phis with the only + // remaining input. + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + phi->ReplaceWith(phi->InputAt(1 - this_index)); + successor->RemovePhi(phi); + } + } else { + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + } + } + } successors_.Reset(); - dominator_ = nullptr; - graph_ = nullptr; + + // Disconnect from the dominator. + dominator_->RemoveDominatedBlock(this); + SetDominator(nullptr); + + // Delete from the graph. The function safely deletes remaining instructions + // and updates the reverse post order. + graph_->DeleteDeadBlock(this); + SetGraph(nullptr); } void HBasicBlock::MergeWith(HBasicBlock* other) { - DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario"; - DCHECK(dominated_blocks_.IsEmpty() - || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other)) - << "Unimplemented block merge scenario"; + DCHECK_EQ(GetGraph(), other->GetGraph()); + DCHECK(GetDominatedBlocks().Contains(other)); + DCHECK_EQ(GetSuccessors().Size(), 1u); + DCHECK_EQ(GetSuccessors().Get(0), other); + DCHECK_EQ(other->GetPredecessors().Size(), 1u); + DCHECK_EQ(other->GetPredecessors().Get(0), this); DCHECK(other->GetPhis().IsEmpty()); + // Move instructions from `other` to `this`. + DCHECK(EndsWithControlFlowInstruction()); + RemoveInstruction(GetLastInstruction()); + instructions_.Add(other->GetInstructions()); + other->instructions_.SetBlockOfInstructions(this); + other->instructions_.Clear(); + + // Remove `other` from the loops it is included in. + for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) { + HLoopInformation* loop_info = it.Current(); + loop_info->Remove(other); + if (loop_info->IsBackEdge(*other)) { + loop_info->ClearBackEdges(); + loop_info->AddBackEdge(this); + } + } + + // Update links to the successors of `other`. successors_.Reset(); - dominated_blocks_.Reset(); + while (!other->successors_.IsEmpty()) { + HBasicBlock* successor = other->successors_.Get(0); + successor->ReplacePredecessor(other, this); + } + + // Update the dominator tree. + dominated_blocks_.Delete(other); + for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { + HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); + dominated_blocks_.Add(dominated); + dominated->SetDominator(this); + } + other->dominated_blocks_.Reset(); + other->dominator_ = nullptr; + + // Clear the list of predecessors of `other` in preparation of deleting it. + other->predecessors_.Reset(); + + // Delete `other` from the graph. The function updates reverse post order. + graph_->DeleteDeadBlock(other); + other->SetGraph(nullptr); +} + +void HBasicBlock::MergeWithInlined(HBasicBlock* other) { + DCHECK_NE(GetGraph(), other->GetGraph()); + DCHECK(GetDominatedBlocks().IsEmpty()); + DCHECK(GetSuccessors().IsEmpty()); + DCHECK(!EndsWithControlFlowInstruction()); + DCHECK_EQ(other->GetPredecessors().Size(), 1u); + DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock()); + DCHECK(other->GetPhis().IsEmpty()); + DCHECK(!other->IsInLoop()); + + // Move instructions from `other` to `this`. instructions_.Add(other->GetInstructions()); - other->GetInstructions().SetBlockOfInstructions(this); + other->instructions_.SetBlockOfInstructions(this); - while (!other->GetSuccessors().IsEmpty()) { - HBasicBlock* successor = other->GetSuccessors().Get(0); + // Update links to the successors of `other`. + successors_.Reset(); + while (!other->successors_.IsEmpty()) { + HBasicBlock* successor = other->successors_.Get(0); successor->ReplacePredecessor(other, this); } + // Update the dominator tree. for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); dominated_blocks_.Add(dominated); @@ -961,6 +1114,24 @@ static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks, } } +void HGraph::DeleteDeadBlock(HBasicBlock* block) { + DCHECK_EQ(block->GetGraph(), this); + DCHECK(block->GetSuccessors().IsEmpty()); + DCHECK(block->GetPredecessors().IsEmpty()); + DCHECK(block->GetDominatedBlocks().IsEmpty()); + DCHECK(block->GetDominator() == nullptr); + + for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + block->RemoveInstruction(it.Current()); + } + for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + block->RemovePhi(it.Current()->AsPhi()); + } + + reverse_post_order_.Delete(block); + blocks_.Put(block->GetBlockId(), nullptr); +} + void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (GetBlocks().Size() == 3) { // Simple case of an entry block, a body block, and an exit block. @@ -993,7 +1164,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { HBasicBlock* first = entry_block_->GetSuccessors().Get(0); DCHECK(!first->IsInLoop()); - at->MergeWith(first); + at->MergeWithInlined(first); exit_block_->ReplaceWith(to); // Update all predecessors of the exit block (now the `to` block) @@ -1064,8 +1235,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(current); outer_graph->reverse_post_order_.Put(++index_of_at, current); if (info != nullptr) { - info->Add(current); current->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + loop_it.Current()->Add(current); + } } } } @@ -1075,8 +1248,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(to); outer_graph->reverse_post_order_.Put(++index_of_at, to); if (info != nullptr) { - info->Add(to); to->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + loop_it.Current()->Add(to); + } if (info->IsBackEdge(*at)) { // Only `at` can become a back edge, as the inlined blocks // are predecessors of `at`. @@ -1097,7 +1272,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // - Remove suspend checks, that hold an environment. // We must do this after the other blocks have been inlined, otherwise ids of // constants could overlap with the inner graph. - int parameter_index = 0; + size_t parameter_index = 0; for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (current->IsNullConstant()) { @@ -1110,6 +1285,14 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // TODO: Don't duplicate floating-point constants. current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction()); } else if (current->IsParameterValue()) { + if (kIsDebugBuild + && invoke->IsInvokeStaticOrDirect() + && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) { + // Ensure we do not use the last input of `invoke`, as it + // contains a clinit check which is not an actual argument. + size_t last_input_index = invoke->InputCount() - 1; + DCHECK(parameter_index != last_input_index); + } current->ReplaceWith(invoke->InputAt(parameter_index++)); } else { DCHECK(current->IsGoto() || current->IsSuspendCheck()); @@ -1121,53 +1304,6 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } -void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) { - // Find the two branches of an If. - DCHECK_EQ(start_block->GetSuccessors().Size(), 2u); - HBasicBlock* left_branch = start_block->GetSuccessors().Get(0); - HBasicBlock* right_branch = start_block->GetSuccessors().Get(1); - - // Make sure this is a diamond control-flow path. - DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block); - DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block); - DCHECK_EQ(end_block->GetPredecessors().Size(), 2u); - DCHECK_EQ(start_block, end_block->GetDominator()); - - // Disconnect the branches and merge the two blocks. This will move - // all instructions from 'end_block' to 'start_block'. - DCHECK(left_branch->IsSingleGoto()); - DCHECK(right_branch->IsSingleGoto()); - left_branch->DisconnectFromAll(); - right_branch->DisconnectFromAll(); - start_block->RemoveInstruction(start_block->GetLastInstruction()); - start_block->MergeWith(end_block); - - // Delete the now redundant blocks from the graph. - blocks_.Put(left_branch->GetBlockId(), nullptr); - blocks_.Put(right_branch->GetBlockId(), nullptr); - blocks_.Put(end_block->GetBlockId(), nullptr); - - // Update reverse post order. - reverse_post_order_.Delete(left_branch); - reverse_post_order_.Delete(right_branch); - reverse_post_order_.Delete(end_block); - - // Update loops which contain the code. - for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) { - HLoopInformation* loop_info = it.Current(); - DCHECK(loop_info->Contains(*left_branch)); - DCHECK(loop_info->Contains(*right_branch)); - DCHECK(loop_info->Contains(*end_block)); - loop_info->Remove(left_branch); - loop_info->Remove(right_branch); - loop_info->Remove(end_block); - if (loop_info->IsBackEdge(*end_block)) { - loop_info->RemoveBackEdge(end_block); - loop_info->AddBackEdge(start_block); - } - } -} - std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index d9d15c4b18..938d6fcd64 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -75,6 +75,10 @@ class HInstructionList { void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); + // Insert `instruction` before/after an existing instruction `cursor`. + void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); + void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor); + // Return true if this list contains `instruction`. bool Contains(HInstruction* instruction) const; @@ -93,6 +97,9 @@ class HInstructionList { void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list); void Add(const HInstructionList& instruction_list); + // Return the number of instructions in the list. This is an expensive operation. + size_t CountSize() const; + private: HInstruction* first_instruction_; HInstruction* last_instruction_; @@ -120,7 +127,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { number_of_vregs_(0), number_of_in_vregs_(0), temporaries_vreg_slots_(0), - has_array_accesses_(false), + has_bounds_checks_(false), debuggable_(debuggable), current_instruction_id_(start_instruction_id), cached_null_constant_(nullptr), @@ -164,7 +171,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); - void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block); + // Removes `block` from the graph. + void DeleteDeadBlock(HBasicBlock* block); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); void SimplifyLoop(HBasicBlock* header); @@ -222,12 +230,12 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return linear_order_; } - bool HasArrayAccesses() const { - return has_array_accesses_; + bool HasBoundsChecks() const { + return has_bounds_checks_; } - void SetHasArrayAccesses(bool value) { - has_array_accesses_ = value; + void SetHasBoundsChecks(bool value) { + has_bounds_checks_ = value; } bool IsDebuggable() const { return debuggable_; } @@ -244,8 +252,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return CreateConstant(value, &cached_long_constants_); } - private: HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; + + private: void VisitBlockForDominatorTree(HBasicBlock* block, HBasicBlock* predecessor, GrowableArray<size_t>* visits); @@ -286,8 +295,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Number of vreg size slots that the temporaries use (used in baseline compiler). size_t temporaries_vreg_slots_; - // Has array accesses. We can totally skip BCE if it's false. - bool has_array_accesses_; + // Has bounds checks. We can totally skip BCE if it's false. + bool has_bounds_checks_; // Indicates whether the graph should be compiled in a way that // ensures full debuggability. If false, we can apply more @@ -447,6 +456,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HBasicBlock* GetDominator() const { return dominator_; } void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; } void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); } + void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); } void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) { for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) { if (dominated_blocks_.Get(i) == existing) { @@ -467,8 +477,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HInstruction* GetFirstInstruction() const { return instructions_.first_instruction_; } HInstruction* GetLastInstruction() const { return instructions_.last_instruction_; } const HInstructionList& GetInstructions() const { return instructions_; } - const HInstructionList& GetPhis() const { return phis_; } HInstruction* GetFirstPhi() const { return phis_.first_instruction_; } + HInstruction* GetLastPhi() const { return phis_.last_instruction_; } + const HInstructionList& GetPhis() const { return phis_; } void AddSuccessor(HBasicBlock* block) { successors_.Add(block); @@ -545,7 +556,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // that this method does not update the graph, reverse post order, loop // information, nor make sure the blocks are consistent (for example ending // with a control flow instruction). - void MergeWith(HBasicBlock* other); + void MergeWithInlined(HBasicBlock* other); // Replace `this` with `other`. Predecessors, successors, and dominated blocks // of `this` are moved to `other`. @@ -554,12 +565,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // with a control flow instruction). void ReplaceWith(HBasicBlock* other); - // Disconnects `this` from all its predecessors, successors and the dominator. - // It assumes that `this` does not dominate any blocks. - // Note that this method does not update the graph, reverse post order, loop - // information, nor make sure the blocks are consistent (for example ending - // with a control flow instruction). - void DisconnectFromAll(); + // Merge `other` at the end of `this`. This method updates loops, reverse post + // order, links to predecessors, successors, dominators and deletes the block + // from the graph. The two blocks must be successive, i.e. `this` the only + // predecessor of `other` and vice versa. + void MergeWith(HBasicBlock* other); + + // Disconnects `this` from all its predecessors, successors and dominator, + // removes it from all loops it is included in and eventually from the graph. + // The block must not dominate any other block. Predecessors and successors + // are safely updated. + void DisconnectAndDelete(); void AddInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); @@ -1149,8 +1165,6 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } - virtual bool ActAsNullConstant() const { return false; } - // Does not apply for all instructions, but having this at top level greatly // simplifies the null check elimination. virtual bool CanBeNull() const { @@ -1158,7 +1172,10 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { return true; } - virtual bool CanDoImplicitNullCheck() const { return false; } + virtual bool CanDoImplicitNullCheckOn(HInstruction* obj) const { + UNUSED(obj); + return false; + } void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) { DCHECK_EQ(GetType(), Primitive::kPrimNot); @@ -1626,7 +1643,7 @@ class HUnaryOperation : public HExpression<1> { // Try to statically evaluate `operation` and return a HConstant // containing the result of this evaluation. If `operation` cannot - // be evaluated as a constant, return nullptr. + // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; // Apply this operation to `x`. @@ -1694,7 +1711,7 @@ class HBinaryOperation : public HExpression<2> { // Try to statically evaluate `operation` and return a HConstant // containing the result of this evaluation. If `operation` cannot - // be evaluated as a constant, return nullptr. + // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; // Apply this operation to `x` and `y`. @@ -1702,11 +1719,11 @@ class HBinaryOperation : public HExpression<2> { virtual int64_t Evaluate(int64_t x, int64_t y) const = 0; // Returns an input that can legally be used as the right input and is - // constant, or nullptr. + // constant, or null. HConstant* GetConstantRight() const; // If `GetConstantRight()` returns one of the input, this returns the other - // one. Otherwise it returns nullptr. + // one. Otherwise it returns null. HInstruction* GetLeastConstantLeft() const; DECLARE_INSTRUCTION(BinaryOperation); @@ -2072,8 +2089,6 @@ class HNullConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return 0; } - bool ActAsNullConstant() const OVERRIDE { return true; } - DECLARE_INSTRUCTION(NullConstant); private: @@ -2095,11 +2110,6 @@ class HIntConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return GetValue(); } - // TODO: Null is represented by the `0` constant. In most cases we replace it - // with a HNullConstant but we don't do it when comparing (a != null). This - // method is an workaround until we fix the above. - bool ActAsNullConstant() const OVERRIDE { return value_ == 0; } - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } bool IsZero() const OVERRIDE { return GetValue() == 0; } bool IsOne() const OVERRIDE { return GetValue() == 1; } @@ -2170,7 +2180,7 @@ class HInvoke : public HInstruction { uint32_t GetDexMethodIndex() const { return dex_method_index_; } - Intrinsics GetIntrinsic() { + Intrinsics GetIntrinsic() const { return intrinsic_; } @@ -2212,6 +2222,14 @@ class HInvoke : public HInstruction { class HInvokeStaticOrDirect : public HInvoke { public: + // Requirements of this method call regarding the class + // initialization (clinit) check of its declaring class. + enum class ClinitCheckRequirement { + kNone, // Class already initialized. + kExplicit, // Static call having explicit clinit check as last input. + kImplicit, // Static call implicitly requiring a clinit check. + }; + HInvokeStaticOrDirect(ArenaAllocator* arena, uint32_t number_of_arguments, Primitive::Type return_type, @@ -2219,13 +2237,16 @@ class HInvokeStaticOrDirect : public HInvoke { uint32_t dex_method_index, bool is_recursive, InvokeType original_invoke_type, - InvokeType invoke_type) + InvokeType invoke_type, + ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), original_invoke_type_(original_invoke_type), invoke_type_(invoke_type), - is_recursive_(is_recursive) {} + is_recursive_(is_recursive), + clinit_check_requirement_(clinit_check_requirement) {} - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + UNUSED(obj); // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. return false; @@ -2236,12 +2257,60 @@ class HInvokeStaticOrDirect : public HInvoke { bool IsRecursive() const { return is_recursive_; } bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); } + // Is this instruction a call to a static method? + bool IsStatic() const { + return GetInvokeType() == kStatic; + } + + // Remove the art::HClinitCheck or art::HLoadClass instruction as + // last input (only relevant for static calls with explicit clinit + // check). + void RemoveClinitCheckOrLoadClassAsLastInput() { + DCHECK(IsStaticWithExplicitClinitCheck()); + size_t last_input_index = InputCount() - 1; + HInstruction* last_input = InputAt(last_input_index); + DCHECK(last_input != nullptr); + DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName(); + RemoveAsUserOfInput(last_input_index); + inputs_.DeleteAt(last_input_index); + clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; + DCHECK(IsStaticWithImplicitClinitCheck()); + } + + // Is this a call to a static method whose declaring class has an + // explicit intialization check in the graph? + bool IsStaticWithExplicitClinitCheck() const { + return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit); + } + + // Is this a call to a static method whose declaring class has an + // implicit intialization check requirement? + bool IsStaticWithImplicitClinitCheck() const { + return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit); + } + DECLARE_INSTRUCTION(InvokeStaticOrDirect); + protected: + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { + const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i); + if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) { + HInstruction* input = input_record.GetInstruction(); + // `input` is the last input of a static invoke marked as having + // an explicit clinit check. It must either be: + // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or + // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation. + DCHECK(input != nullptr); + DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName(); + } + return input_record; + } + private: const InvokeType original_invoke_type_; const InvokeType invoke_type_; const bool is_recursive_; + ClinitCheckRequirement clinit_check_requirement_; DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; @@ -2257,9 +2326,9 @@ class HInvokeVirtual : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { // TODO: Add implicit null checks in intrinsics. - return !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); } uint32_t GetVTableIndex() const { return vtable_index_; } @@ -2283,9 +2352,9 @@ class HInvokeInterface : public HInvoke { : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { // TODO: Add implicit null checks in intrinsics. - return !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); } uint32_t GetImtIndex() const { return imt_index_; } @@ -2746,6 +2815,7 @@ class HPhi : public HInstruction { size_t InputCount() const OVERRIDE { return inputs_.Size(); } void AddInput(HInstruction* input); + void RemoveInputAt(size_t index); Primitive::Type GetType() const OVERRIDE { return type_; } void SetType(Primitive::Type type) { type_ = type; } @@ -2855,8 +2925,8 @@ class HInstanceFieldGet : public HExpression<1> { return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - bool CanDoImplicitNullCheck() const OVERRIDE { - return GetFieldOffset().Uint32Value() < kPageSize; + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; } size_t ComputeHashCode() const OVERRIDE { @@ -2889,8 +2959,8 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool CanDoImplicitNullCheck() const OVERRIDE { - return GetFieldOffset().Uint32Value() < kPageSize; + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; } const FieldInfo& GetFieldInfo() const { return field_info_; } @@ -2920,7 +2990,8 @@ class HArrayGet : public HExpression<2> { UNUSED(other); return true; } - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + UNUSED(obj); // TODO: We can be smarter here. // Currently, the array access is always preceded by an ArrayLength or a NullCheck // which generates the implicit null check. There are cases when these can be removed @@ -2962,7 +3033,8 @@ class HArraySet : public HTemplateInstruction<3> { return needs_type_check_; } - bool CanDoImplicitNullCheck() const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + UNUSED(obj); // TODO: Same as for ArrayGet. return false; } @@ -3014,7 +3086,9 @@ class HArrayLength : public HExpression<1> { UNUSED(other); return true; } - bool CanDoImplicitNullCheck() const OVERRIDE { return true; } + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + return obj == InputAt(0); + } DECLARE_INSTRUCTION(ArrayLength); @@ -3210,7 +3284,6 @@ class HLoadString : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HLoadString); }; -// TODO: Pass this check to HInvokeStaticOrDirect nodes. /** * Performs an initialization check on its Class object input. */ @@ -3351,6 +3424,7 @@ class HInstanceOf : public HExpression<2> { uint32_t dex_pc) : HExpression(Primitive::kPrimBoolean, SideEffects::None()), class_is_final_(class_is_final), + must_do_null_check_(true), dex_pc_(dex_pc) { SetRawInputAt(0, object); SetRawInputAt(1, constant); @@ -3370,10 +3444,15 @@ class HInstanceOf : public HExpression<2> { bool IsClassFinal() const { return class_is_final_; } + // Used only in code generation. + bool MustDoNullCheck() const { return must_do_null_check_; } + void ClearMustDoNullCheck() { must_do_null_check_ = false; } + DECLARE_INSTRUCTION(InstanceOf); private: const bool class_is_final_; + bool must_do_null_check_; const uint32_t dex_pc_; DISALLOW_COPY_AND_ASSIGN(HInstanceOf); @@ -3414,6 +3493,7 @@ class HCheckCast : public HTemplateInstruction<2> { uint32_t dex_pc) : HTemplateInstruction(SideEffects::None()), class_is_final_(class_is_final), + must_do_null_check_(true), dex_pc_(dex_pc) { SetRawInputAt(0, object); SetRawInputAt(1, constant); @@ -3432,6 +3512,9 @@ class HCheckCast : public HTemplateInstruction<2> { bool CanThrow() const OVERRIDE { return true; } + bool MustDoNullCheck() const { return must_do_null_check_; } + void ClearMustDoNullCheck() { must_do_null_check_ = false; } + uint32_t GetDexPc() const { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3440,6 +3523,7 @@ class HCheckCast : public HTemplateInstruction<2> { private: const bool class_is_final_; + bool must_do_null_check_; const uint32_t dex_pc_; DISALLOW_COPY_AND_ASSIGN(HCheckCast); diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index b13e07eb22..c46a21955c 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -21,9 +21,9 @@ namespace art { -void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const { +void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const { if (stats_ != nullptr) { - stats_->RecordStat(compilation_stat); + stats_->RecordStat(compilation_stat, count); } } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 8b2028177b..ccf8de9f6a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -48,7 +48,7 @@ class HOptimization : public ValueObject { void Check(); protected: - void MaybeRecordStat(MethodCompilationStat compilation_stat) const; + void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; HGraph* const graph_; // Used to record stats about the optimization. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a17d6e1822..05451bcaa6 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -208,6 +208,12 @@ class OptimizingCompiler FINAL : public Compiler { void UnInit() const OVERRIDE; + void MaybeRecordStat(MethodCompilationStat compilation_stat) const { + if (compilation_stats_.get() != nullptr) { + compilation_stats_->RecordStat(compilation_stat); + } + } + private: // Whether we should run any optimization or register allocation. If false, will // just run the code generation after the graph was built. @@ -226,7 +232,7 @@ class OptimizingCompiler FINAL : public Compiler { CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit) const; - mutable OptimizingCompilerStats compilation_stats_; + std::unique_ptr<OptimizingCompilerStats> compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -243,7 +249,6 @@ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) run_optimizations_( (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) && !driver->GetCompilerOptions().GetDebuggable()), - compilation_stats_(), delegate_(Create(driver, Compiler::Kind::kQuick)) {} void OptimizingCompiler::Init() { @@ -258,6 +263,9 @@ void OptimizingCompiler::Init() { << "Invoke the compiler with '-j1'."; visualizer_output_.reset(new std::ofstream(cfg_file_name)); } + if (driver->GetDumpStats()) { + compilation_stats_.reset(new OptimizingCompilerStats()); + } } void OptimizingCompiler::UnInit() const { @@ -265,7 +273,9 @@ void OptimizingCompiler::UnInit() const { } OptimizingCompiler::~OptimizingCompiler() { - compilation_stats_.Log(); + if (compilation_stats_.get() != nullptr) { + compilation_stats_->Log(); + } } void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const { @@ -310,14 +320,15 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce(graph); + HDeadCodeElimination dce1(graph, stats); + HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final"); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_not(graph); + HBooleanSimplifier boolean_simplify(graph); HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); - HConstantFolding fold2(graph); + HConstantFolding fold2(graph, "constant_folding_after_inlining"); SideEffectsAnalysis side_effects(graph); GVNOptimization gvn(graph, side_effects); LICM licm(graph, side_effects); @@ -329,20 +340,21 @@ static void RunOptimizations(HGraph* graph, HOptimization* optimizations[] = { &intrinsics, - &dce, + &dce1, &fold1, &simplify1, + &inliner, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_not, - &inliner, + &boolean_simplify, &fold2, &side_effects, &gvn, &licm, &bce, &type_propagation, - &simplify2 + &simplify2, + &dce2, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); @@ -381,7 +393,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer) const { StackHandleScopeCollection handles(Thread::Current()); - RunOptimizations(graph, compiler_driver, &compilation_stats_, + RunOptimizations(graph, compiler_driver, compilation_stats_.get(), dex_file, dex_compilation_unit, pass_info_printer, &handles); AllocateRegisters(graph, codegen, pass_info_printer); @@ -397,7 +409,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, std::vector<uint8_t> stack_map; codegen->BuildStackMaps(&stack_map); - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); + MaybeRecordStat(MethodCompilationStat::kCompiledOptimized); return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, @@ -435,7 +447,7 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); + MaybeRecordStat(MethodCompilationStat::kCompiledBaseline); return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), @@ -463,7 +475,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite const DexFile& dex_file) const { UNUSED(invoke_type); std::string method_name = PrettyMethod(method_idx, dex_file); - compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); + MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); // Always use the thumb2 assembler: some runtime functionality (like implicit stack @@ -474,12 +486,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // Do not attempt to compile on architectures we do not support. if (!IsInstructionSetSupported(instruction_set)) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa); + MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa); return nullptr; } if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledPathological); + MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); return nullptr; } @@ -489,7 +501,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace) && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledSpaceFilter); + MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter); return nullptr; } @@ -514,7 +526,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite compiler_driver->GetCompilerOptions())); if (codegen.get() == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); + MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } codegen->GetAssembler()->cfi().SetEnabled( @@ -531,7 +543,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite &dex_compilation_unit, &dex_file, compiler_driver, - &compilation_stats_); + compilation_stats_.get()); VLOG(compiler) << "Building " << method_name; @@ -558,7 +570,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); + MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); return nullptr; } } @@ -576,11 +588,11 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite VLOG(compiler) << "Compile baseline " << method_name; if (!run_optimizations_) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedDisabled); + MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled); } else if (!can_optimize) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedTryCatch); + MaybeRecordStat(MethodCompilationStat::kNotOptimizedTryCatch); } else if (!can_allocate_registers) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); + MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); } return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); @@ -603,9 +615,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, method_idx, jclass_loader, dex_file); } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); } else { - compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); } } @@ -616,7 +628,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, jclass_loader, dex_file); if (method != nullptr) { - compilation_stats_.RecordStat(MethodCompilationStat::kCompiledQuick); + MaybeRecordStat(MethodCompilationStat::kCompiledQuick); } return method; } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index d4a936d1c3..65c84e6942 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,6 +29,7 @@ enum MethodCompilationStat { kCompiledBaseline, kCompiledOptimized, kCompiledQuick, + kInstructionSimplifications, kInlinedInvoke, kNotCompiledUnsupportedIsa, kNotCompiledPathological, @@ -48,8 +49,8 @@ enum MethodCompilationStat { kNotCompiledVerifyAtRuntime, kNotCompiledClassNotVerified, kRemovedCheckedCast, + kRemovedDeadInstruction, kRemovedNullCheck, - kInstructionSimplifications, kLastStat }; @@ -57,8 +58,8 @@ class OptimizingCompilerStats { public: OptimizingCompilerStats() {} - void RecordStat(MethodCompilationStat stat) { - compile_stats_[stat]++; + void RecordStat(MethodCompilationStat stat, size_t count = 1) { + compile_stats_[stat] += count; } void Log() const { @@ -82,7 +83,7 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - VLOG(compiler) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; } } } @@ -96,6 +97,7 @@ class OptimizingCompilerStats { case kCompiledOptimized : return "kCompiledOptimized"; case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; + case kInstructionSimplifications: return "kInstructionSimplifications"; case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; @@ -114,8 +116,8 @@ class OptimizingCompilerStats { case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; + case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - case kInstructionSimplifications: return "kInstructionSimplifications"; default: LOG(FATAL) << "invalid stat"; } return ""; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f5d8d82571..fa6b3c292c 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -79,4 +79,26 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { } } +void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (invoke->IsStaticWithExplicitClinitCheck()) { + size_t last_input_index = invoke->InputCount() - 1; + HInstruction* last_input = invoke->InputAt(last_input_index); + DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + + // Remove a load class instruction as last input of a static + // invoke, which has been added (along with a clinit check, + // removed by PrepareForRegisterAllocation::VisitClinitCheck + // previously) by the graph builder during the creation of the + // static invoke instruction, but is no longer required at this + // stage (i.e., after inlining has been performed). + invoke->RemoveClinitCheckOrLoadClassAsLastInput(); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } + } +} + } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index c28507c925..d7f277fa0d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -39,6 +39,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitBoundType(HBoundType* bound_type) OVERRIDE; void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 31ddbb70a7..12b1c2b9bd 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -58,36 +58,40 @@ void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { } void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { - HInstruction* lastInstruction = block->GetLastInstruction(); - if (!lastInstruction->IsIf()) { + HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + if (ifInstruction == nullptr) { return; } - HInstruction* ifInput = lastInstruction->InputAt(0); + HInstruction* ifInput = ifInstruction->InputAt(0); if (!ifInput->IsNotEqual() && !ifInput->IsEqual()) { return; } HInstruction* input0 = ifInput->InputAt(0); HInstruction* input1 = ifInput->InputAt(1); - HInstruction* obj; + HInstruction* obj = nullptr; - if ((input0->GetType() == Primitive::kPrimNot) && input1->ActAsNullConstant()) { + if (input1->IsNullConstant()) { obj = input0; - } else if ((input1->GetType() == Primitive::kPrimNot) && input0->ActAsNullConstant()) { + } else if (input0->IsNullConstant()) { obj = input1; } else { return; } - HBoundType* bound_type = - new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false)); - - block->InsertInstructionBefore(bound_type, lastInstruction); + // We only need to bound the type if we have uses in the relevant block. + // So start with null and create the HBoundType lazily, only if it's needed. + HBoundType* bound_type = nullptr; HBasicBlock* notNullBlock = ifInput->IsNotEqual() - ? lastInstruction->AsIf()->IfTrueSuccessor() - : lastInstruction->AsIf()->IfFalseSuccessor(); + ? ifInstruction->IfTrueSuccessor() + : ifInstruction->IfFalseSuccessor(); + for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); if (notNullBlock->Dominates(user->GetBlock())) { + if (bound_type == nullptr) { + bound_type = new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false)); + notNullBlock->InsertInstructionBefore(bound_type, notNullBlock->GetFirstInstruction()); + } user->ReplaceInput(bound_type, it.Current()->GetIndex()); } } @@ -98,47 +102,58 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { // If that's the case insert an HBoundType instruction to bound the type of `x` // to `ClassX` in the scope of the dominated blocks. void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { - HInstruction* lastInstruction = block->GetLastInstruction(); - if (!lastInstruction->IsIf()) { + HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + if (ifInstruction == nullptr) { return; } - - HInstruction* ifInput = lastInstruction->InputAt(0); - HInstruction* instanceOf; - HBasicBlock* instanceOfTrueBlock; + HInstruction* ifInput = ifInstruction->InputAt(0); + HInstruction* instanceOf = nullptr; + HBasicBlock* instanceOfTrueBlock = nullptr; + + // The instruction simplifier has transformed: + // - `if (a instanceof A)` into an HIf with an HInstanceOf input + // - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn + // has an HInstanceOf input) + // So we should not see the usual HEqual here. if (ifInput->IsInstanceOf()) { instanceOf = ifInput; - instanceOfTrueBlock = lastInstruction->AsIf()->IfTrueSuccessor(); + instanceOfTrueBlock = ifInstruction->IfTrueSuccessor(); } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) { instanceOf = ifInput->InputAt(0); - instanceOfTrueBlock = lastInstruction->AsIf()->IfFalseSuccessor(); + instanceOfTrueBlock = ifInstruction->IfFalseSuccessor(); } else { return; } - HInstruction* obj = instanceOf->InputAt(0); - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - - ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); - HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); - - // Narrow the type as much as possible. - { - ScopedObjectAccess soa(Thread::Current()); - if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { - bound_type->SetReferenceTypeInfo(obj_rti); - } else { - bound_type->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); - } - } - - block->InsertInstructionBefore(bound_type, lastInstruction); + // We only need to bound the type if we have uses in the relevant block. + // So start with null and create the HBoundType lazily, only if it's needed. + HBoundType* bound_type = nullptr; + HInstruction* obj = instanceOf->InputAt(0); for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); if (instanceOfTrueBlock->Dominates(user->GetBlock())) { + if (bound_type == nullptr) { + HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); + + ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); + + // Narrow the type as much as possible. + { + ScopedObjectAccess soa(Thread::Current()); + if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { + bound_type->SetReferenceTypeInfo(obj_rti); + } else { + bound_type->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + } + } + + instanceOfTrueBlock->InsertInstructionBefore( + bound_type, instanceOfTrueBlock->GetFirstInstruction()); + } user->ReplaceInput(bound_type, it.Current()->GetIndex()); } } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 6350b35ca1..0fdf051957 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -378,7 +378,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // Split just before first register use. size_t first_register_use = current->FirstRegisterUse(); if (first_register_use != kNoLifetime) { - LiveInterval* split = Split(current, first_register_use - 1); + LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); // Don't add directly to `unhandled`, it needs to be sorted and the start // of this new interval might be after intervals already in the list. AddSorted(&unhandled, split); @@ -903,6 +903,10 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { return false; } + // We use the first use to compare with other intervals. If this interval + // is used after any active intervals, we will spill this interval. + size_t first_use = current->FirstUseAfter(current->GetStart()); + // First set all registers as not being used. size_t* next_use = registers_array_; for (size_t i = 0; i < number_of_registers_; ++i) { @@ -917,7 +921,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (active->IsFixed()) { next_use[active->GetRegister()] = current->GetStart(); } else { - size_t use = active->FirstRegisterUseAfter(current->GetStart()); + size_t use = active->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[active->GetRegister()] = use; } @@ -945,7 +949,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { next_use[inactive->GetRegister()] = std::min(next_intersection, next_use[inactive->GetRegister()]); } else { - size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); + size_t use = inactive->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); } @@ -959,16 +963,16 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK(current->IsHighInterval()); reg = current->GetRegister(); // When allocating the low part, we made sure the high register was available. - DCHECK_LT(first_register_use, next_use[reg]); + DCHECK_LT(first_use, next_use[reg]); } else if (current->IsLowInterval()) { reg = FindAvailableRegisterPair(next_use, first_register_use); // We should spill if both registers are not available. - should_spill = (first_register_use >= next_use[reg]) - || (first_register_use >= next_use[GetHighForLowRegister(reg)]); + should_spill = (first_use >= next_use[reg]) + || (first_use >= next_use[GetHighForLowRegister(reg)]); } else { DCHECK(!current->IsHighInterval()); reg = FindAvailableRegister(next_use); - should_spill = (first_register_use >= next_use[reg]); + should_spill = (first_use >= next_use[reg]); } DCHECK_NE(reg, kNoRegister); @@ -993,15 +997,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); - LiveInterval* split = Split(current, first_register_use - 1); + LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); if (current == split) { DumpInterval(std::cerr, current); DumpAllIntervals(std::cerr); // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); CHECK(false) << "There is not enough registers available for " << split->GetParent()->GetDefinedBy()->DebugName() << " " << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1; + << " at " << first_register_use - 1 << " " + << (at == nullptr ? "" : at->DebugName()); } AddSorted(unhandled_, split); } @@ -1094,6 +1100,31 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } } +LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) { + HBasicBlock* block_from = liveness_.GetBlockFromPosition(from); + HBasicBlock* block_to = liveness_.GetBlockFromPosition(to); + DCHECK(block_from != nullptr); + DCHECK(block_to != nullptr); + + // Both locations are in the same block. We split at the given location. + if (block_from == block_to) { + return Split(interval, to); + } + + // If `to` is in a loop, find the outermost loop header which does not contain `from`. + for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) { + HBasicBlock* header = it.Current()->GetHeader(); + if (block_from->GetLifetimeStart() >= header->GetLifetimeStart()) { + break; + } + block_to = header; + } + + // Split at the start of the found block, to piggy back on existing moves + // due to resolution if non-linear control flow (see `ConnectSplitSiblings`). + return Split(interval, block_to->GetLifetimeStart()); +} + LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { DCHECK_GE(position, interval->GetStart()); DCHECK(!interval->IsDeadAt(position)); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 717be75533..dc9c708eea 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -86,8 +86,12 @@ class RegisterAllocator { // Add `interval` in the given sorted list. static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval); - // Split `interval` at the position `at`. The new interval starts at `at`. - LiveInterval* Split(LiveInterval* interval, size_t at); + // Split `interval` at the position `position`. The new interval starts at `position`. + LiveInterval* Split(LiveInterval* interval, size_t position); + + // Split `interval` at a position between `from` and `to`. The method will try + // to find an optimal split position. + LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to); // Returns whether `reg` is blocked by the code generator. bool IsBlocked(int reg) const; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 182cd0e833..8c6d904a4c 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -854,6 +854,10 @@ TEST(RegisterAllocatorTest, SpillInactive) { X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); + // Populate the instructions in the liveness object, to please the register allocator. + for (size_t i = 0; i < 32; ++i) { + liveness.instructions_from_lifetime_position_.Add(user); + } RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.unhandled_core_intervals_.Add(fourth); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 8eb98a186b..97254edb5e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -131,6 +131,9 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { void Dump(std::ostream& stream) const { stream << position_; + if (is_environment_) { + stream << " (env)"; + } } UsePosition* Dup(ArenaAllocator* allocator) const { @@ -330,7 +333,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } if (after_loop == nullptr) { // Uses are only in the loop. - first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(start, end, nullptr); + first_range_ = last_range_ = range_search_start_ = + new (allocator_) LiveRange(start, end, nullptr); } else if (after_loop->GetStart() <= end) { first_range_ = range_search_start_ = after_loop; // There are uses after the loop. @@ -366,6 +370,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveInterval* GetParent() const { return parent_; } + // Returns whether this interval is the parent interval, that is, the interval + // that starts where the HInstruction is defined. + bool IsParent() const { return parent_ == this; } + LiveRange* GetFirstRange() const { return first_range_; } LiveRange* GetLastRange() const { return last_range_; } @@ -442,7 +450,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && defined_by_ != nullptr) { + if (position == GetStart() && IsParent()) { LocationSummary* locations = defined_by_->GetLocations(); Location location = locations->Out(); // This interval is the first interval of the instruction. If the output @@ -491,12 +499,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } + if (position == GetStart() && IsParent()) { + if (defined_by_->GetLocations()->Out().IsValid()) { + return position; + } + } + UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { if (!use->GetIsEnvironment()) { + Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); size_t use_position = use->GetPosition(); - if (use_position > position) { + if (use_position > position && location.IsValid()) { return use_position; } } @@ -582,7 +597,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { previous->next_ = nullptr; new_interval->first_range_ = current; if (range_search_start_ != nullptr && range_search_start_->GetEnd() >= current->GetEnd()) { - // Search start point is inside `new_interval`. Change it to nullptr + // Search start point is inside `new_interval`. Change it to null // (i.e. the end of the interval) in the original interval. range_search_start_ = nullptr; } @@ -725,7 +740,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } void AddHighInterval(bool is_temp = false) { - DCHECK_EQ(GetParent(), this); + DCHECK(IsParent()); DCHECK(!HasHighInterval()); DCHECK(!HasLowInterval()); high_or_low_interval_ = new (allocator_) LiveInterval( @@ -849,7 +864,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { defined_by_(defined_by) {} // Searches for a LiveRange that either covers the given position or is the - // first next LiveRange. Returns nullptr if no such LiveRange exists. Ranges + // first next LiveRange. Returns null if no such LiveRange exists. Ranges // known to end before `position` can be skipped with `search_start`. LiveRange* FindRangeAtOrAfter(size_t position, LiveRange* search_start) const { if (kIsDebugBuild) { @@ -983,6 +998,15 @@ class SsaLivenessAnalysis : public ValueObject { return instructions_from_lifetime_position_.Get(index); } + HBasicBlock* GetBlockFromPosition(size_t index) const { + HInstruction* instruction = GetInstructionFromPosition(index / 2); + if (instruction == nullptr) { + // If we are at a block boundary, get the block following. + instruction = GetInstructionFromPosition((index / 2) + 1); + } + return instruction->GetBlock(); + } + HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); @@ -1053,6 +1077,8 @@ class SsaLivenessAnalysis : public ValueObject { GrowableArray<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); + DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis); }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc new file mode 100644 index 0000000000..8344fc3237 --- /dev/null +++ b/compiler/optimizing/stack_map_stream.cc @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stack_map_stream.h" + +namespace art { + +void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth) { + DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; + current_entry_.dex_pc = dex_pc; + current_entry_.native_pc_offset = native_pc_offset; + current_entry_.register_mask = register_mask; + current_entry_.sp_mask = sp_mask; + current_entry_.num_dex_registers = num_dex_registers; + current_entry_.inlining_depth = inlining_depth; + current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_map_hash = 0; + current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; + if (num_dex_registers != 0) { + current_entry_.live_dex_registers_mask = + new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); + } else { + current_entry_.live_dex_registers_mask = nullptr; + } + + if (sp_mask != nullptr) { + stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); + } + if (inlining_depth > 0) { + number_of_stack_maps_with_inline_info_++; + } + + dex_pc_max_ = std::max(dex_pc_max_, dex_pc); + native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); + register_mask_max_ = std::max(register_mask_max_, register_mask); +} + +void StackMapStream::EndStackMapEntry() { + current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); + stack_maps_.Add(current_entry_); + current_entry_ = StackMapEntry(); +} + +void StackMapStream::AddDexRegisterEntry(uint16_t dex_register, + DexRegisterLocation::Kind kind, + int32_t value) { + DCHECK_LT(dex_register, current_entry_.num_dex_registers); + + if (kind != DexRegisterLocation::Kind::kNone) { + // Ensure we only use non-compressed location kind at this stage. + DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) + << DexRegisterLocation::PrettyDescriptor(kind); + DexRegisterLocation location(kind, value); + + // Look for Dex register `location` in the location catalog (using the + // companion hash map of locations to indices). Use its index if it + // is already in the location catalog. If not, insert it (in the + // location catalog and the hash map) and use the newly created index. + auto it = location_catalog_entries_indices_.Find(location); + if (it != location_catalog_entries_indices_.end()) { + // Retrieve the index from the hash map. + dex_register_locations_.Add(it->second); + } else { + // Create a new entry in the location catalog and the hash map. + size_t index = location_catalog_entries_.Size(); + location_catalog_entries_.Add(location); + dex_register_locations_.Add(index); + location_catalog_entries_indices_.Insert(std::make_pair(location, index)); + } + + current_entry_.live_dex_registers_mask->SetBit(dex_register); + current_entry_.dex_register_map_hash += + (1 << (dex_register % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte))); + current_entry_.dex_register_map_hash += static_cast<uint32_t>(value); + current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind); + } +} + +void StackMapStream::AddInlineInfoEntry(uint32_t method_index) { + InlineInfoEntry entry; + entry.method_index = method_index; + inline_infos_.Add(entry); +} + +size_t StackMapStream::PrepareForFillIn() { + int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. + stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; + inline_info_size_ = ComputeInlineInfoSize(); + dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + stack_maps_size_ = stack_maps_.Size() + * StackMap::ComputeStackMapSize(stack_mask_size_, + inline_info_size_, + dex_register_maps_size_, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); + dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); + + // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. + needed_size_ = CodeInfo::kFixedSize + + dex_register_location_catalog_size_ + + stack_maps_size_ + + dex_register_maps_size_ + + inline_info_size_; + + dex_register_location_catalog_start_ = CodeInfo::kFixedSize; + stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_; + dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_; + inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_; + + return needed_size_; +} + +size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { + size_t size = DexRegisterLocationCatalog::kFixedSize; + for (size_t location_catalog_entry_index = 0; + location_catalog_entry_index < location_catalog_entries_.Size(); + ++location_catalog_entry_index) { + DexRegisterLocation dex_register_location = + location_catalog_entries_.Get(location_catalog_entry_index); + size += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + return size; +} + +size_t StackMapStream::ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + // Size of the map in bytes. + size_t size = DexRegisterMap::kFixedSize; + // Add the live bit mask for the Dex register liveness. + size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); + // Compute the size of the set of live Dex register entries. + size_t number_of_live_dex_registers = 0; + for (size_t dex_register_number = 0; + dex_register_number < entry.num_dex_registers; + ++dex_register_number) { + if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + ++number_of_live_dex_registers; + } + } + size_t map_entries_size_in_bits = + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + * number_of_live_dex_registers; + size_t map_entries_size_in_bytes = + RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; + size += map_entries_size_in_bytes; + return size; +} + +size_t StackMapStream::ComputeDexRegisterMapsSize() const { + size_t size = 0; + for (size_t i = 0; i < stack_maps_.Size(); ++i) { + StackMapEntry entry = stack_maps_.Get(i); + if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { + // Entries with the same dex map will have the same offset. + size += ComputeDexRegisterMapSize(entry); + } + } + return size; +} + +size_t StackMapStream::ComputeInlineInfoSize() const { + return inline_infos_.Size() * InlineInfo::SingleEntrySize() + // For encoding the depth. + + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); +} + +void StackMapStream::FillIn(MemoryRegion region) { + DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; + DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn"; + + CodeInfo code_info(region); + DCHECK_EQ(region.size(), needed_size_); + code_info.SetOverallSize(region.size()); + + MemoryRegion dex_register_locations_region = region.Subregion( + dex_register_maps_start_, dex_register_maps_size_); + + MemoryRegion inline_infos_region = region.Subregion( + inline_infos_start_, inline_info_size_); + + code_info.SetEncoding(inline_info_size_, + dex_register_maps_size_, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); + code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetStackMaskSize(stack_mask_size_); + DCHECK_EQ(code_info.GetStackMapsSize(), stack_maps_size_); + + // Set the Dex register location catalog. + code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size()); + MemoryRegion dex_register_location_catalog_region = region.Subregion( + dex_register_location_catalog_start_, dex_register_location_catalog_size_); + DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); + // Offset in `dex_register_location_catalog` where to store the next + // register location. + size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; + for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { + DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); + location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + // Ensure we reached the end of the Dex registers location_catalog. + DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); + + uintptr_t next_dex_register_map_offset = 0; + uintptr_t next_inline_info_offset = 0; + for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + StackMap stack_map = code_info.GetStackMapAt(i); + StackMapEntry entry = stack_maps_.Get(i); + + stack_map.SetDexPc(code_info, entry.dex_pc); + stack_map.SetNativePcOffset(code_info, entry.native_pc_offset); + stack_map.SetRegisterMask(code_info, entry.register_mask); + if (entry.sp_mask != nullptr) { + stack_map.SetStackMask(code_info, *entry.sp_mask); + } + + if (entry.num_dex_registers == 0) { + // No dex map available. + stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap); + } else { + // Search for an entry with the same dex map. + if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) { + // If we have a hit reuse the offset. + stack_map.SetDexRegisterMapOffset(code_info, + code_info.GetStackMapAt(entry.same_dex_register_map_as_) + .GetDexRegisterMapOffset(code_info)); + } else { + // New dex registers maps should be added to the stack map. + MemoryRegion register_region = + dex_register_locations_region.Subregion( + next_dex_register_map_offset, + ComputeDexRegisterMapSize(entry)); + next_dex_register_map_offset += register_region.size(); + DexRegisterMap dex_register_map(register_region); + stack_map.SetDexRegisterMapOffset( + code_info, register_region.start() - dex_register_locations_region.start()); + + // Set the live bit mask. + dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); + + // Set the dex register location mapping data. + for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + dex_register_number < entry.num_dex_registers; + ++dex_register_number) { + if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + size_t location_catalog_entry_index = + dex_register_locations_.Get(entry.dex_register_locations_start_index + + index_in_dex_register_locations); + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + entry.num_dex_registers, + location_catalog_entries_.Size()); + ++index_in_dex_register_locations; + } + } + } + } + + // Set the inlining info. + if (entry.inlining_depth != 0) { + MemoryRegion inline_region = inline_infos_region.Subregion( + next_inline_info_offset, + InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize()); + next_inline_info_offset += inline_region.size(); + InlineInfo inline_info(inline_region); + + // Currently relative to the dex register map. + stack_map.SetInlineDescriptorOffset( + code_info, inline_region.start() - dex_register_locations_region.start()); + + inline_info.SetDepth(entry.inlining_depth); + for (size_t j = 0; j < entry.inlining_depth; ++j) { + InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index); + inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); + } + } else { + if (inline_info_size_ != 0) { + stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo); + } + } + } +} + +size_t StackMapStream::FindEntryWithTheSameDexMap() { + size_t current_entry_index = stack_maps_.Size(); + auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); + if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { + // We don't have a perfect hash functions so we need a list to collect all stack maps + // which might have the same dex register map. + GrowableArray<uint32_t> stack_map_indices(allocator_, 1); + stack_map_indices.Add(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); + return kNoSameDexMapFound; + } + + // We might have collisions, so we need to check whether or not we really have a match. + for (size_t i = 0; i < entries_it->second.Size(); i++) { + size_t test_entry_index = entries_it->second.Get(i); + if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + return test_entry_index; + } + } + entries_it->second.Add(current_entry_index); + return kNoSameDexMapFound; +} + +bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const { + if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) { + return true; + } + if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) { + return false; + } + if (a.num_dex_registers != b.num_dex_registers) { + return false; + } + + int index_in_dex_register_locations = 0; + for (uint32_t i = 0; i < a.num_dex_registers; i++) { + if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { + return false; + } + if (a.live_dex_registers_mask->IsBitSet(i)) { + size_t a_loc = dex_register_locations_.Get( + a.dex_register_locations_start_index + index_in_dex_register_locations); + size_t b_loc = dex_register_locations_.Get( + b.dex_register_locations_start_index + index_in_dex_register_locations); + if (a_loc != b_loc) { + return false; + } + ++index_in_dex_register_locations; + } + } + return true; +} + +} // namespace art diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 9a9e068a9b..0c626be89f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -70,13 +70,18 @@ class StackMapStream : public ValueObject { native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {} - - // Compute bytes needed to encode a mask with the given maximum element. - static uint32_t StackMaskEncodingSize(int max_element) { - int number_of_bits = max_element + 1; // Need room for max element too. - return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte; - } + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + current_entry_(), + stack_mask_size_(0), + inline_info_size_(0), + dex_register_maps_size_(0), + stack_maps_size_(0), + dex_register_location_catalog_size_(0), + dex_register_location_catalog_start_(0), + stack_maps_start_(0), + dex_register_maps_start_(0), + inline_infos_start_(0), + needed_size_(0) {} // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -90,380 +95,42 @@ class StackMapStream : public ValueObject { size_t inline_infos_start_index; BitVector* live_dex_registers_mask; uint32_t dex_register_map_hash; + size_t same_dex_register_map_as_; }; struct InlineInfoEntry { uint32_t method_index; }; - void AddStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth) { - StackMapEntry entry; - entry.dex_pc = dex_pc; - entry.native_pc_offset = native_pc_offset; - entry.register_mask = register_mask; - entry.sp_mask = sp_mask; - entry.num_dex_registers = num_dex_registers; - entry.inlining_depth = inlining_depth; - entry.dex_register_locations_start_index = dex_register_locations_.Size(); - entry.inline_infos_start_index = inline_infos_.Size(); - entry.dex_register_map_hash = 0; - if (num_dex_registers != 0) { - entry.live_dex_registers_mask = - new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); - } else { - entry.live_dex_registers_mask = nullptr; - } - stack_maps_.Add(entry); - - if (sp_mask != nullptr) { - stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); - } - if (inlining_depth > 0) { - number_of_stack_maps_with_inline_info_++; - } - - dex_pc_max_ = std::max(dex_pc_max_, dex_pc); - native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); - register_mask_max_ = std::max(register_mask_max_, register_mask); - } - - void AddInlineInfoEntry(uint32_t method_index) { - InlineInfoEntry entry; - entry.method_index = method_index; - inline_infos_.Add(entry); - } - - size_t ComputeNeededSize() { - size_t size = CodeInfo::kFixedSize - + ComputeDexRegisterLocationCatalogSize() - + ComputeStackMapsSize() - + ComputeDexRegisterMapsSize() - + ComputeInlineInfoSize(); - // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. - return size; - } - - size_t ComputeStackMaskSize() const { - return StackMaskEncodingSize(stack_mask_max_); - } - - size_t ComputeStackMapsSize() { - return stack_maps_.Size() * StackMap::ComputeStackMapSize( - ComputeStackMaskSize(), - ComputeInlineInfoSize(), - ComputeDexRegisterMapsSize(), - dex_pc_max_, - native_pc_offset_max_, - register_mask_max_); - } - - // Compute the size of the Dex register location catalog of `entry`. - size_t ComputeDexRegisterLocationCatalogSize() const { - size_t size = DexRegisterLocationCatalog::kFixedSize; - for (size_t location_catalog_entry_index = 0; - location_catalog_entry_index < location_catalog_entries_.Size(); - ++location_catalog_entry_index) { - DexRegisterLocation dex_register_location = - location_catalog_entries_.Get(location_catalog_entry_index); - size += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - return size; - } - - size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { - // Size of the map in bytes. - size_t size = DexRegisterMap::kFixedSize; - // Add the live bit mask for the Dex register liveness. - size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); - // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = 0; - for (size_t dex_register_number = 0; - dex_register_number < entry.num_dex_registers; - ++dex_register_number) { - if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - ++number_of_live_dex_registers; - } - } - size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) - * number_of_live_dex_registers; - size_t map_entries_size_in_bytes = - RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; - size += map_entries_size_in_bytes; - return size; - } - - // Compute the size of all the Dex register maps. - size_t ComputeDexRegisterMapsSize() { - size_t size = 0; - for (size_t i = 0; i < stack_maps_.Size(); ++i) { - if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) { - // Entries with the same dex map will have the same offset. - size += ComputeDexRegisterMapSize(stack_maps_.Get(i)); - } - } - return size; - } - - // Compute the size of all the inline information pieces. - size_t ComputeInlineInfoSize() const { - return inline_infos_.Size() * InlineInfo::SingleEntrySize() - // For encoding the depth. - + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); - } + void BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth); + void EndStackMapEntry(); - size_t ComputeDexRegisterLocationCatalogStart() const { - return CodeInfo::kFixedSize; - } - - size_t ComputeStackMapsStart() const { - return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize(); - } - - size_t ComputeDexRegisterMapsStart() { - return ComputeStackMapsStart() + ComputeStackMapsSize(); - } - - size_t ComputeInlineInfoStart() { - return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize(); - } + void AddDexRegisterEntry(uint16_t dex_register, + DexRegisterLocation::Kind kind, + int32_t value); - void FillIn(MemoryRegion region) { - CodeInfo code_info(region); - DCHECK_EQ(region.size(), ComputeNeededSize()); - code_info.SetOverallSize(region.size()); + void AddInlineInfoEntry(uint32_t method_index); - size_t stack_mask_size = ComputeStackMaskSize(); - - size_t dex_register_map_size = ComputeDexRegisterMapsSize(); - size_t inline_info_size = ComputeInlineInfoSize(); - - MemoryRegion dex_register_locations_region = region.Subregion( - ComputeDexRegisterMapsStart(), - dex_register_map_size); - - MemoryRegion inline_infos_region = region.Subregion( - ComputeInlineInfoStart(), - inline_info_size); - - code_info.SetEncoding(inline_info_size, - dex_register_map_size, - dex_pc_max_, - native_pc_offset_max_, - register_mask_max_); - code_info.SetNumberOfStackMaps(stack_maps_.Size()); - code_info.SetStackMaskSize(stack_mask_size); - DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize()); - - // Set the Dex register location catalog. - code_info.SetNumberOfDexRegisterLocationCatalogEntries( - location_catalog_entries_.Size()); - MemoryRegion dex_register_location_catalog_region = region.Subregion( - ComputeDexRegisterLocationCatalogStart(), - ComputeDexRegisterLocationCatalogSize()); - DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); - // Offset in `dex_register_location_catalog` where to store the next - // register location. - size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { - DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); - dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); - location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - // Ensure we reached the end of the Dex registers location_catalog. - DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); - - uintptr_t next_dex_register_map_offset = 0; - uintptr_t next_inline_info_offset = 0; - for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { - StackMap stack_map = code_info.GetStackMapAt(i); - StackMapEntry entry = stack_maps_.Get(i); - - stack_map.SetDexPc(code_info, entry.dex_pc); - stack_map.SetNativePcOffset(code_info, entry.native_pc_offset); - stack_map.SetRegisterMask(code_info, entry.register_mask); - if (entry.sp_mask != nullptr) { - stack_map.SetStackMask(code_info, *entry.sp_mask); - } - - if (entry.num_dex_registers == 0) { - // No dex map available. - stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap); - } else { - // Search for an entry with the same dex map. - size_t entry_with_same_map = FindEntryWithTheSameDexMap(i); - if (entry_with_same_map != kNoSameDexMapFound) { - // If we have a hit reuse the offset. - stack_map.SetDexRegisterMapOffset(code_info, - code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info)); - } else { - // New dex registers maps should be added to the stack map. - MemoryRegion register_region = - dex_register_locations_region.Subregion( - next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry)); - next_dex_register_map_offset += register_region.size(); - DexRegisterMap dex_register_map(register_region); - stack_map.SetDexRegisterMapOffset( - code_info, register_region.start() - dex_register_locations_region.start()); - - // Set the live bit mask. - dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); - - // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < entry.num_dex_registers; - ++dex_register_number) { - if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = - dex_register_locations_.Get(entry.dex_register_locations_start_index - + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - entry.num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } - } - } - } - - // Set the inlining info. - if (entry.inlining_depth != 0) { - MemoryRegion inline_region = inline_infos_region.Subregion( - next_inline_info_offset, - InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize()); - next_inline_info_offset += inline_region.size(); - InlineInfo inline_info(inline_region); - - // Currently relative to the dex register map. - stack_map.SetInlineDescriptorOffset( - code_info, inline_region.start() - dex_register_locations_region.start()); - - inline_info.SetDepth(entry.inlining_depth); - for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index); - inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); - } - } else { - if (inline_info_size != 0) { - stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo); - } - } - } - } - - void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) { - StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); - DCHECK_LT(dex_register, entry.num_dex_registers); - - if (kind != DexRegisterLocation::Kind::kNone) { - // Ensure we only use non-compressed location kind at this stage. - DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) - << DexRegisterLocation::PrettyDescriptor(kind); - DexRegisterLocation location(kind, value); - - // Look for Dex register `location` in the location catalog (using the - // companion hash map of locations to indices). Use its index if it - // is already in the location catalog. If not, insert it (in the - // location catalog and the hash map) and use the newly created index. - auto it = location_catalog_entries_indices_.Find(location); - if (it != location_catalog_entries_indices_.end()) { - // Retrieve the index from the hash map. - dex_register_locations_.Add(it->second); - } else { - // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.Size(); - location_catalog_entries_.Add(location); - dex_register_locations_.Add(index); - location_catalog_entries_indices_.Insert(std::make_pair(location, index)); - } - - entry.live_dex_registers_mask->SetBit(dex_register); - entry.dex_register_map_hash += - (1 << (dex_register % (sizeof(entry.dex_register_map_hash) * kBitsPerByte))); - entry.dex_register_map_hash += static_cast<uint32_t>(value); - entry.dex_register_map_hash += static_cast<uint32_t>(kind); - stack_maps_.Put(stack_maps_.Size() - 1, entry); - } - } + // Prepares the stream to fill in a memory region. Must be called before FillIn. + // Returns the size (in bytes) needed to store this stream. + size_t PrepareForFillIn(); + void FillIn(MemoryRegion region); private: - // Returns the index of an entry with the same dex register map - // or kNoSameDexMapFound if no such entry exists. - size_t FindEntryWithTheSameDexMap(size_t entry_index) { - StackMapEntry entry = stack_maps_.Get(entry_index); - auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash); - if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { - // We don't have a perfect hash functions so we need a list to collect all stack maps - // which might have the same dex register map. - GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(entry_index); - dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices); - return kNoSameDexMapFound; - } - - // TODO: We don't need to add ourselves to the map if we can guarantee that - // FindEntryWithTheSameDexMap is called just once per stack map entry. - // A good way to do this is to cache the offset in the stack map entry. This - // is easier to do if we add markers when the stack map constructions begins - // and when it ends. + size_t ComputeDexRegisterLocationCatalogSize() const; + size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const; + size_t ComputeDexRegisterMapsSize() const; + size_t ComputeInlineInfoSize() const; - // We might have collisions, so we need to check whether or not we should - // add the entry to the map. `needs_to_be_added` keeps track of this. - bool needs_to_be_added = true; - size_t result = kNoSameDexMapFound; - for (size_t i = 0; i < entries_it->second.Size(); i++) { - size_t test_entry_index = entries_it->second.Get(i); - if (test_entry_index == entry_index) { - needs_to_be_added = false; - } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) { - result = test_entry_index; - needs_to_be_added = false; - break; - } - } - if (needs_to_be_added) { - entries_it->second.Add(entry_index); - } - return result; - } - - bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const { - if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) { - return true; - } - if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) { - return false; - } - if (a.num_dex_registers != b.num_dex_registers) { - return false; - } - - int index_in_dex_register_locations = 0; - for (uint32_t i = 0; i < a.num_dex_registers; i++) { - if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { - return false; - } - if (a.live_dex_registers_mask->IsBitSet(i)) { - size_t a_loc = dex_register_locations_.Get( - a.dex_register_locations_start_index + index_in_dex_register_locations); - size_t b_loc = dex_register_locations_.Get( - b.dex_register_locations_start_index + index_in_dex_register_locations); - if (a_loc != b_loc) { - return false; - } - ++index_in_dex_register_locations; - } - } - return true; - } + // Returns the index of an entry with the same dex register map as the current_entry, + // or kNoSameDexMapFound if no such entry exists. + size_t FindEntryWithTheSameDexMap(); + bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const; ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; @@ -476,8 +143,7 @@ class StackMapStream : public ValueObject { DexRegisterLocationHashFn> LocationCatalogEntriesIndices; LocationCatalogEntriesIndices location_catalog_entries_indices_; - // A set of concatenated maps of Dex register locations indices to - // `location_catalog_entries_`. + // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. GrowableArray<size_t> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; @@ -488,6 +154,18 @@ class StackMapStream : public ValueObject { ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + StackMapEntry current_entry_; + size_t stack_mask_size_; + size_t inline_info_size_; + size_t dex_register_maps_size_; + size_t stack_maps_size_; + size_t dex_register_location_catalog_size_; + size_t dex_register_location_catalog_start_; + size_t stack_maps_start_; + size_t dex_register_maps_start_; + size_t inline_infos_start_; + size_t needed_size_; + static constexpr uint32_t kNoSameDexMapFound = -1; DISALLOW_COPY_AND_ASSIGN(StackMapStream); diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 8d160bc81e..3291a77021 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -40,11 +40,12 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -123,20 +124,22 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(2); sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); + stream.EndStackMapEntry(); ArenaBitVector sp_mask2(&arena, 0, true); sp_mask2.SetBit(3); sp_mask1.SetBit(8); - stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location. stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -273,11 +276,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -353,7 +357,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 1024; // Create the first stack map (and its Dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { // Use two different Dex register locations to populate this map, @@ -362,13 +366,15 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { // art::DexRegisterMap::SingleEntrySizeInBits). stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location. } + stream.EndStackMapEntry(); // Create the second stack map (and its Dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); for (uint32_t i = 0; i < number_of_dex_registers; ++i) { stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location. } + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -413,19 +419,22 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); // Second stack map, which should share the same dex register map. - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); // Third stack map (doesn't share the dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location. stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -462,9 +471,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 0; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); |