diff options
Diffstat (limited to 'compiler/optimizing')
35 files changed, 1822 insertions, 455 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index e9fcfe2bed..1fc247faf1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -847,7 +847,7 @@ class BCEVisitor : public HGraphVisitor { } // Try index range obtained by induction variable analysis. // Disables dynamic bce if OOB is certain. - if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) { + if (InductionRangeFitsIn(&array_range, bounds_check, &try_dynamic_bce)) { ReplaceInstruction(bounds_check, index); return; } @@ -912,9 +912,9 @@ class BCEVisitor : public HGraphVisitor { static bool HasSameInputAtBackEdges(HPhi* phi) { DCHECK(phi->IsLoopHeaderPhi()); - auto&& inputs = phi->GetInputs(); + HConstInputsRef inputs = phi->GetInputs(); // Start with input 1. Input 0 is from the incoming block. - HInstruction* input1 = inputs[1]; + const HInstruction* input1 = inputs[1]; DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( *phi->GetBlock()->GetPredecessors()[1])); for (size_t i = 2; i < inputs.size(); ++i) { @@ -1299,33 +1299,30 @@ class BCEVisitor : public HGraphVisitor { * parameter try_dynamic_bce is set to false if OOB is certain. */ bool InductionRangeFitsIn(ValueRange* array_range, - HInstruction* context, - HInstruction* index, + HBoundsCheck* context, bool* try_dynamic_bce) { InductionVarRange::Value v1; InductionVarRange::Value v2; bool needs_finite_test = false; - if (induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test)) { - do { - if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && - v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { - DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); - DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); - ValueRange index_range(GetGraph()->GetArena(), - ValueBound(v1.instruction, v1.b_constant), - ValueBound(v2.instruction, v2.b_constant)); - // If analysis reveals a certain OOB, disable dynamic BCE. - if (index_range.GetLower().LessThan(array_range->GetLower()) || - index_range.GetUpper().GreaterThan(array_range->GetUpper())) { - *try_dynamic_bce = false; - return false; - } - // Use analysis for static bce only if loop is finite. - if (!needs_finite_test && index_range.FitsIn(array_range)) { - return true; - } + HInstruction* index = context->InputAt(0); + HInstruction* hint = ValueBound::HuntForDeclaration(context->InputAt(1)); + if (induction_range_.GetInductionRange(context, index, hint, &v1, &v2, &needs_finite_test)) { + if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && + v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { + DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); + DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); + ValueRange index_range(GetGraph()->GetArena(), + ValueBound(v1.instruction, v1.b_constant), + ValueBound(v2.instruction, v2.b_constant)); + // If analysis reveals a certain OOB, disable dynamic BCE. Otherwise, + // use analysis for static bce only if loop is finite. + if (index_range.GetLower().LessThan(array_range->GetLower()) || + index_range.GetUpper().GreaterThan(array_range->GetUpper())) { + *try_dynamic_bce = false; + } else if (!needs_finite_test && index_range.FitsIn(array_range)) { + return true; } - } while (induction_range_.RefineOuter(&v1, &v2)); + } } return false; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 12aa15207c..4520f9b3e3 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -111,7 +111,7 @@ static bool CheckTypeConsistency(HInstruction* instruction) { << " " << locations->Out(); } - auto&& inputs = instruction->GetInputs(); + HConstInputsRef inputs = instruction->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { DCHECK(CheckType(inputs[i]->GetType(), locations->InAt(i))) << inputs[i]->GetType() << " " << locations->InAt(i); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 663c68a17b..690ecc3429 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -2292,8 +2292,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimFloat: { // Processing a Dex `float-to-int' instruction. SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>(); - __ vmovs(temp, in.AsFpuRegister<SRegister>()); - __ vcvtis(temp, temp); + __ vcvtis(temp, in.AsFpuRegister<SRegister>()); __ vmovrs(out.AsRegister<Register>(), temp); break; } @@ -2301,9 +2300,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimDouble: { // Processing a Dex `double-to-int' instruction. SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>(); - DRegister temp_d = FromLowSToD(temp_s); - __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); - __ vcvtid(temp_s, temp_d); + __ vcvtid(temp_s, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); __ vmovrs(out.AsRegister<Register>(), temp_s); break; } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 810db20888..b6dca95354 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -39,6 +39,10 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -477,7 +481,12 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena(), &isa_features), - isa_features_(isa_features) { + isa_features_(isa_features), + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -948,6 +957,71 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + method_patches_.size() + + call_patches_.size() + + pc_relative_dex_cache_patches_.size(); + linker_patches->reserve(size); + for (const auto& entry : method_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const auto& entry : call_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t base_element_offset = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + DCHECK(info.pc_rel_label.IsBound()); + uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, + &dex_file, + pc_rel_offset, + base_element_offset)); + } +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( + const DexFile& dex_file, uint32_t element_offset) { + return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( + const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index); + return &patches->back(); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, + MethodToLiteralMap* map) { + return map->GetOrCreate( + target_method, + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &method_patches_); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &call_patches_); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -3743,12 +3817,38 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + + // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = !isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)); + + if (invoke->HasPcRelativeDexCache()) { + // kDexCachePcRelative is mutually exclusive with + // kDirectAddressWithFixup/kCallDirectWithFixup. + CHECK(!has_extra_input); + has_extra_input = true; + } + IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // Add the extra input register if either the dex cache array base register + // or the PC-relative base register for accessing literals is needed. + if (has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { @@ -3773,42 +3873,103 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( return HLoadClass::LoadKind::kDexCacheViaMethod; } +Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - switch (desired_dispatch_info.method_load_kind) { + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + // We disable PC-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = true; + bool fallback_call = true; + switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; + fallback_load = has_irreducible_loops; + break; default: + fallback_load = false; break; } - switch (desired_dispatch_info.code_ptr_location) { + switch (dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + fallback_call = has_irreducible_loops; + break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. For the moment, we fall back to kCallArtMethod. - return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - desired_dispatch_info.method_load_data, - 0u - }; + // TODO: Implement this type. + break; default: - return desired_dispatch_info; + fallback_call = false; + break; } + if (fallback_load) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_data = 0; + } + if (fallback_call) { + dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + dispatch_info.direct_code_ptr = 0; + } + return dispatch_info; } void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // All registers are assumed to be correctly set up per the calling convention. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - switch (invoke->GetMethodLoadKind()) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = isa_features_.IsR6(); + // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = invoke->HasPcRelativeDexCache() || + (!isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup))); + Register base_reg = has_extra_input + ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) + : ZERO; + + // For better instruction scheduling we load the direct code pointer before the method pointer. + switch (code_ptr_location) { + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: + // T9 = invoke->GetDirectCodePtr(); + __ LoadConst32(T9, invoke->GetDirectCodePtr()); + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 = code address from literal pool with link-time patch. + __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); + break; + default: + break; + } + + switch (method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ LoadFromOffset(kLoadWord, @@ -3823,11 +3984,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + __ LoadLiteral(temp.AsRegister<Register>(), + base_reg, + DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); + int32_t offset = + invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); @@ -3858,20 +4026,19 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } } - switch (invoke->GetCodePtrLocation()) { + switch (code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Bal(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ LoadConst32(T9, invoke->GetDirectCodePtr()); - // LR() + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 prepared above for better instruction scheduling. + // T9() __ Jalr(T9); __ Nop(); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. + // TODO: Implement this type. // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). LOG(FATAL) << "Unsupported"; UNREACHABLE(); @@ -5142,6 +5309,57 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr } } +void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = insn->GetLocations(); + Register reg = locations->Out().AsRegister<Register>(); + + CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); + + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + // Grab the return address off RA. + __ Move(reg, RA); + + // Remember this offset (the obtained PC value) for later use with constant area. + __ BindPcRelBaseLabel(); +} + +void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + Register reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); + + if (codegen_->GetInstructionSetFeatures().IsR6()) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(reg, /* placeholder */ 0x1234); + __ Addiu(reg, reg, /* placeholder */ 0x5678); + } else { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + __ Bind(&info->high_label); + __ Lui(reg, /* placeholder */ 0x1234); + __ Bind(&info->pc_rel_label); + __ Ori(reg, reg, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(reg, reg, RA); + } +} + void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { // The trampoline uses the same calling convention as dex calling conventions, // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 6487f28ad5..08f74c04d1 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -285,6 +285,9 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; } const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } + // Emit linker patches. + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void MarkGCCard(Register object, Register value); // Register allocation. @@ -372,7 +375,39 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays + // and boot image strings. The only difference is the interpretation of the offset_or_index. + struct PcRelativePatchInfo { + PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx) { } + PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; + + const DexFile& target_dex_file; + // Either the dex cache array element offset or the string index. + uint32_t offset_or_index; + // Label for the instruction loading the most significant half of the offset that's added to PC + // to form the base address (the least significant half is loaded with the instruction that + // follows). + MipsLabel high_label; + // Label for the instruction corresponding to PC+0. + MipsLabel pc_rel_label; + }; + + PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, + uint32_t element_offset); + private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + + Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); + Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); + Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); + PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + uint32_t offset_or_index, + ArenaDeque<PcRelativePatchInfo>* patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -382,6 +417,12 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler assembler_; const MipsInstructionSetFeatures& isa_features_; + // Method patch info, map MethodReference to a literal for method address and method code. + MethodToLiteralMap method_patches_; + MethodToLiteralMap call_patches_; + // PC-relative patch info for each HMipsDexCacheArraysBase. + ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); }; diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc new file mode 100644 index 0000000000..0f42d9ce0f --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_mips.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HMipsDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + return dex_cache_array_bases_.GetOrCreate( + &dex_file, + [this, &dex_file]() { + HMipsDexCacheArraysBase* base = + new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + return base; + }); + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h new file mode 100644 index 0000000000..c8def2842e --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace mips { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 2bd2403dd6..c8cba205fd 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -362,7 +362,7 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { instruction->GetId())); } size_t use_index = use.GetIndex(); - auto&& user_inputs = user->GetInputs(); + HConstInputsRef user_inputs = user->GetInputs(); if ((use_index >= user_inputs.size()) || (user_inputs[use_index] != instruction)) { AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong " "UseListNode index.", @@ -490,7 +490,7 @@ void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { VisitInstruction(invoke); if (invoke->IsStaticWithExplicitClinitCheck()) { - HInstruction* last_input = invoke->GetInputs().back(); + const HInstruction* last_input = invoke->GetInputs().back(); if (last_input == nullptr) { AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " "has a null pointer as last input.", @@ -664,17 +664,19 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { } } -static bool IsSameSizeConstant(HInstruction* insn1, HInstruction* insn2) { +static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* insn2) { return insn1->IsConstant() && insn2->IsConstant() && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType()); } -static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) { +static bool IsConstantEquivalent(const HInstruction* insn1, + const HInstruction* insn2, + BitVector* visited) { if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) { - auto&& insn1_inputs = insn1->GetInputs(); - auto&& insn2_inputs = insn2->GetInputs(); + HConstInputsRef insn1_inputs = insn1->GetInputs(); + HConstInputsRef insn2_inputs = insn2->GetInputs(); if (insn1_inputs.size() != insn2_inputs.size()) { return false; } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 4af8d1985b..9d67373321 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -511,7 +511,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); - auto&& inputs = instruction->GetInputs(); + HConstInputsRef inputs = instruction->GetInputs(); if (!inputs.empty()) { StringList input_list; for (const HInstruction* input : inputs) { diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 52426d73c6..129c2a94b5 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -341,7 +341,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(HLoopIn HInstruction* phi, size_t input_index) { // Match all phi inputs from input_index onwards exactly. - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); DCHECK_LT(input_index, inputs.size()); InductionInfo* a = LookupInfo(loop, inputs[input_index]); for (size_t i = input_index + 1; i < inputs.size(); i++) { @@ -464,7 +464,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferCnv(Inducti HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi, size_t input_index) { // Match all phi inputs from input_index onwards exactly. - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); DCHECK_LT(input_index, inputs.size()); auto ita = cycle_.find(inputs[input_index]); if (ita != cycle_.end()) { diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index f1965f07b2..7c74816c26 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -132,7 +132,7 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* a, InductionInfo* b, Primitive::Type type) { - DCHECK(a != nullptr); + DCHECK(a != nullptr && b != nullptr); return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, type); } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index bc920d96b5..5e587e0810 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -73,9 +73,12 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { return v; } -/** - * Corrects a value for type to account for arithmetic wrap-around in lower precision. - */ +/** Helper method to test for a constant value. */ +static bool IsConstantValue(InductionVarRange::Value v) { + return v.is_known && v.a_constant == 0; +} + +/** Corrects a value for type to account for arithmetic wrap-around in lower precision. */ static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primitive::Type type) { switch (type) { case Primitive::kPrimShort: @@ -85,26 +88,15 @@ static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primi // TODO: maybe some room for improvement, like allowing widening conversions const int32_t min = Primitive::MinValueOfIntegralType(type); const int32_t max = Primitive::MaxValueOfIntegralType(type); - return (v.is_known && v.a_constant == 0 && min <= v.b_constant && v.b_constant <= max) + return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max) ? v : InductionVarRange::Value(); } default: - // At int or higher. return v; } } -/** Helper method to test for a constant value. */ -static bool IsConstantValue(InductionVarRange::Value v) { - return v.is_known && v.a_constant == 0; -} - -/** Helper method to test for same constant value. */ -static bool IsSameConstantValue(InductionVarRange::Value v1, InductionVarRange::Value v2) { - return IsConstantValue(v1) && IsConstantValue(v2) && v1.b_constant == v2.b_constant; -} - /** Helper method to insert an instruction. */ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); @@ -119,22 +111,22 @@ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { // InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) - : induction_analysis_(induction_analysis) { + : induction_analysis_(induction_analysis), + chase_hint_(nullptr) { DCHECK(induction_analysis != nullptr); } bool InductionVarRange::GetInductionRange(HInstruction* context, HInstruction* instruction, + HInstruction* chase_hint, /*out*/Value* min_val, /*out*/Value* max_val, /*out*/bool* needs_finite_test) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop == nullptr) { - return false; // no loop - } - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); - if (info == nullptr) { - return false; // no induction information + HLoopInformation* loop = nullptr; + HInductionVarAnalysis::InductionInfo* info = nullptr; + HInductionVarAnalysis::InductionInfo* trip = nullptr; + if (!HasInductionInfo(context, instruction, &loop, &info, &trip)) { + return false; } // Type int or lower (this is not too restrictive since intended clients, like // bounds check elimination, will have truncated higher precision induction @@ -148,45 +140,15 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, default: return false; } - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); // Find range. + chase_hint_ = chase_hint; + bool in_body = context->GetBlock() != loop->GetHeader(); *min_val = GetVal(info, trip, in_body, /* is_min */ true); *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); return true; } -bool InductionVarRange::RefineOuter(/*in-out*/ Value* min_val, - /*in-out*/ Value* max_val) const { - if (min_val->instruction != nullptr || max_val->instruction != nullptr) { - Value v1_min = RefineOuter(*min_val, /* is_min */ true); - Value v2_max = RefineOuter(*max_val, /* is_min */ false); - // The refined range is safe if both sides refine the same instruction. Otherwise, since two - // different ranges are combined, the new refined range is safe to pass back to the client if - // the extremes of the computed ranges ensure no arithmetic wrap-around anomalies occur. - if (min_val->instruction != max_val->instruction) { - Value v1_max = RefineOuter(*min_val, /* is_min */ false); - Value v2_min = RefineOuter(*max_val, /* is_min */ true); - if (!IsConstantValue(v1_max) || - !IsConstantValue(v2_min) || - v1_max.b_constant > v2_min.b_constant) { - return false; - } - } - // Did something change? - if (v1_min.instruction != min_val->instruction || v2_max.instruction != max_val->instruction) { - *min_val = v1_min; - *max_val = v2_max; - return true; - } - } - return false; -} - bool InductionVarRange::CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* needs_finite_test, @@ -226,7 +188,7 @@ void InductionVarRange::GenerateTakenTest(HInstruction* context, bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, - /*out*/ int64_t *value) const { + /*out*/ int64_t* value) const { if (info != nullptr) { // A direct 32-bit or 64-bit constant fetch. This immediately satisfies // any of the three requests (kExact, kAtMost, and KAtLeast). @@ -236,27 +198,16 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, return true; } } - // Try range analysis while traversing outward on loops. - bool in_body = true; // no known trip count - Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true); - Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false); - do { - // Make sure *both* extremes are known to avoid arithmetic wrap-around anomalies. - if (IsConstantValue(v_min) && IsConstantValue(v_max) && v_min.b_constant <= v_max.b_constant) { - if ((request == kExact && v_min.b_constant == v_max.b_constant) || request == kAtMost) { - *value = v_max.b_constant; - return true; - } else if (request == kAtLeast) { - *value = v_min.b_constant; - return true; - } - } - } while (RefineOuter(&v_min, &v_max)); - // Exploit array length + c >= c, with c <= 0 to avoid arithmetic wrap-around anomalies - // (e.g. array length == maxint and c == 1 would yield minint). - if (request == kAtLeast) { - if (v_min.a_constant == 1 && v_min.b_constant <= 0 && v_min.instruction->IsArrayLength()) { - *value = v_min.b_constant; + // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies. + Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true); + Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false); + if (IsConstantValue(min_val) && + IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) { + if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) { + *value = max_val.b_constant; + return true; + } else if (request == kAtLeast) { + *value = min_val.b_constant; return true; } } @@ -264,6 +215,51 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, return false; } +bool InductionVarRange::HasInductionInfo( + HInstruction* context, + HInstruction* instruction, + /*out*/ HLoopInformation** loop, + /*out*/ HInductionVarAnalysis::InductionInfo** info, + /*out*/ HInductionVarAnalysis::InductionInfo** trip) const { + HLoopInformation* l = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (l != nullptr) { + HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction); + if (i != nullptr) { + *loop = l; + *info = i; + *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction()); + return true; + } + } + return false; +} + +bool InductionVarRange::IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const { + if (trip != nullptr) { + // Both bounds that define a trip-count are well-behaved if they either are not defined + // in any loop, or are contained in a proper interval. This allows finding the min/max + // of an expression by chasing outward. + InductionVarRange range(induction_analysis_); + HInductionVarAnalysis::InductionInfo* lower = trip->op_b->op_a; + HInductionVarAnalysis::InductionInfo* upper = trip->op_b->op_b; + int64_t not_used = 0; + return (!HasFetchInLoop(lower) || range.IsConstant(lower, kAtLeast, ¬_used)) && + (!HasFetchInLoop(upper) || range.IsConstant(upper, kAtLeast, ¬_used)); + } + return true; +} + +bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const { + if (info != nullptr) { + if (info->induction_class == HInductionVarAnalysis::kInvariant && + info->operation == HInductionVarAnalysis::kFetch) { + return info->fetch->GetBlock()->GetLoopInformation() != nullptr; + } + return HasFetchInLoop(info->op_a) || HasFetchInLoop(info->op_b); + } + return false; +} + bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const { if (info != nullptr) { if (info->induction_class == HInductionVarAnalysis::kLinear) { @@ -299,13 +295,13 @@ InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::Ind HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { - // Detect common situation where an offset inside the trip count cancels out during range + // Detect common situation where an offset inside the trip-count cancels out during range // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information // with intermediate results that only incorporate single instructions. if (trip != nullptr) { HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a; - if (trip_expr->operation == HInductionVarAnalysis::kSub) { + if (trip_expr->type == info->type && trip_expr->operation == HInductionVarAnalysis::kSub) { int64_t stride_value = 0; if (IsConstant(info->op_a, kExact, &stride_value)) { if (!is_min && stride_value == 1) { @@ -349,12 +345,25 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { - // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes - // more likely range analysis will compare the same instructions as terminal nodes. + // Stop chasing the instruction at constant or hint. int64_t value; - if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { + if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { return Value(static_cast<int32_t>(value)); - } else if (instruction->IsAdd()) { + } else if (instruction == chase_hint_) { + return Value(instruction, 1, 0); + } + // Special cases when encountering a single instruction that denotes trip count in the + // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int + if (in_body && trip != nullptr && instruction == trip->op_a->fetch) { + if (is_min) { + return Value(1); + } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) { + return Value(std::numeric_limits<int32_t>::max()); + } + } + // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely + // range analysis will compare the same instructions as terminal nodes. + if (instruction->IsAdd()) { if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { return AddValue(Value(static_cast<int32_t>(value)), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); @@ -362,19 +371,35 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(static_cast<int32_t>(value))); } - } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); + } else if (instruction->IsArrayLength()) { + // Return extreme values when chasing constants. Otherwise, chase deeper. + if (chase_hint_ == nullptr) { + return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max()); + } else if (instruction->InputAt(0)->IsNewArray()) { + return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); + } } else if (instruction->IsTypeConversion()) { // Since analysis is 32-bit (or narrower) we allow a widening along the path. if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt && instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) { return GetFetch(instruction->InputAt(0), trip, in_body, is_min); } - } else if (is_min) { - // Special case for finding minimum: minimum of trip-count in loop-body is 1. - if (trip != nullptr && in_body && instruction == trip->op_a->fetch) { - return Value(1); - } + } + // Chase an invariant fetch that is defined by an outer loop if the trip-count used + // so far is well-behaved in both bounds and the next trip-count is safe. + // Example: + // for (int i = 0; i <= 100; i++) // safe + // for (int j = 0; j <= i; j++) // well-behaved + // j is in range [0, i ] (if i is chase hint) + // or in range [0, 100] (otherwise) + HLoopInformation* next_loop = nullptr; + HInductionVarAnalysis::InductionInfo* next_info = nullptr; + HInductionVarAnalysis::InductionInfo* next_trip = nullptr; + bool next_in_body = true; // inner loop is always in body of outer loop + if (HasInductionInfo(instruction, instruction, &next_loop, &next_info, &next_trip) && + IsWellBehavedTripCount(trip) && + !IsUnsafeTripCount(next_trip)) { + return GetVal(next_info, next_trip, next_in_body, is_min); } return Value(instruction, 1, 0); } @@ -421,9 +446,8 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct break; } break; - case HInductionVarAnalysis::kLinear: { + case HInductionVarAnalysis::kLinear: return CorrectForType(GetLinear(info, trip, in_body, is_min), info->type); - } case HInductionVarAnalysis::kWrapAround: case HInductionVarAnalysis::kPeriodic: return MergeVal(GetVal(info->op_a, trip, in_body, is_min), @@ -438,20 +462,18 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { + // Constant times range. + int64_t value = 0; + if (IsConstant(info1, kExact, &value)) { + return MulRangeAndConstant(value, info2, trip, in_body, is_min); + } else if (IsConstant(info2, kExact, &value)) { + return MulRangeAndConstant(value, info1, trip, in_body, is_min); + } + // Interval ranges. Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); - // Try to refine first operand. - if (!IsConstantValue(v1_min) && !IsConstantValue(v1_max)) { - RefineOuter(&v1_min, &v1_max); - } - // Constant times range. - if (IsSameConstantValue(v1_min, v1_max)) { - return MulRangeAndConstant(v2_min, v2_max, v1_min, is_min); - } else if (IsSameConstantValue(v2_min, v2_max)) { - return MulRangeAndConstant(v1_min, v1_max, v2_min, is_min); - } // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -476,14 +498,16 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { + // Range divided by constant. + int64_t value = 0; + if (IsConstant(info2, kExact, &value)) { + return DivRangeAndConstant(value, info1, trip, in_body, is_min); + } + // Interval ranges. Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); - // Range divided by constant. - if (IsSameConstantValue(v2_min, v2_max)) { - return DivRangeAndConstant(v1_min, v1_max, v2_min, is_min); - } // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -503,18 +527,30 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return Value(); } -InductionVarRange::Value InductionVarRange::MulRangeAndConstant(Value v_min, - Value v_max, - Value c, - bool is_min) const { - return is_min == (c.b_constant >= 0) ? MulValue(v_min, c) : MulValue(v_max, c); +InductionVarRange::Value InductionVarRange::MulRangeAndConstant( + int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const { + if (CanLongValueFitIntoInt(value)) { + Value c(static_cast<int32_t>(value)); + return MulValue(GetVal(info, trip, in_body, is_min == value >= 0), c); + } + return Value(); } -InductionVarRange::Value InductionVarRange::DivRangeAndConstant(Value v_min, - Value v_max, - Value c, - bool is_min) const { - return is_min == (c.b_constant >= 0) ? DivValue(v_min, c) : DivValue(v_max, c); +InductionVarRange::Value InductionVarRange::DivRangeAndConstant( + int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const { + if (CanLongValueFitIntoInt(value)) { + Value c(static_cast<int32_t>(value)); + return DivValue(GetVal(info, trip, in_body, is_min == value >= 0), c); + } + return Value(); } InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const { @@ -580,28 +616,6 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is return Value(); } -InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const { - if (v.instruction == nullptr) { - return v; // nothing to refine - } - HLoopInformation* loop = - v.instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop == nullptr) { - return v; // no loop - } - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, v.instruction); - if (info == nullptr) { - return v; // no induction information - } - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = true; // inner always in more outer - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - // Try to refine "a x instruction + b" with outer loop range information on instruction. - return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), Value(v.b_constant)); -} - bool InductionVarRange::GenerateCode(HInstruction* context, HInstruction* instruction, HGraph* graph, @@ -611,27 +625,18 @@ bool InductionVarRange::GenerateCode(HInstruction* context, /*out*/HInstruction** taken_test, /*out*/bool* needs_finite_test, /*out*/bool* needs_taken_test) const { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop == nullptr) { - return false; // no loop - } - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); - if (info == nullptr) { - return false; // no induction information - } - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - if (trip == nullptr) { - return false; // codegen relies on trip count + HLoopInformation* loop = nullptr; + HInductionVarAnalysis::InductionInfo* info = nullptr; + HInductionVarAnalysis::InductionInfo* trip = nullptr; + if (!HasInductionInfo(context, instruction, &loop, &info, &trip) || trip == nullptr) { + return false; // codegen needs all information, including tripcount } // Determine what tests are needed. A finite test is needed if the evaluation code uses the // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot" // the computed range). A taken test is needed for any unknown trip-count, even if evaluation // code does not use the trip-count explicitly (since there could be an implicit relation // between e.g. an invariant subscript and a not-taken condition). + bool in_body = context->GetBlock() != loop->GetHeader(); *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); *needs_taken_test = IsBodyTripCount(trip); // Code generation for taken test: generate the code when requested or otherwise analyze diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 0af41560ff..00aaa167f8 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -57,21 +57,19 @@ class InductionVarRange { explicit InductionVarRange(HInductionVarAnalysis* induction); /** - * Given a context denoted by the first instruction, returns a possibly conservative - * lower and upper bound on the instruction's value in the output parameters min_val - * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test - * is needed to protect the range evaluation inside its loop. Returns false on failure. + * Given a context denoted by the first instruction, returns a possibly conservative lower + * and upper bound on the instruction's value in the output parameters min_val and max_val, + * respectively. The need_finite_test flag denotes if an additional finite-test is needed + * to protect the range evaluation inside its loop. The parameter chase_hint defines an + * instruction at which chasing may stop. Returns false on failure. */ bool GetInductionRange(HInstruction* context, HInstruction* instruction, + HInstruction* chase_hint, /*out*/ Value* min_val, /*out*/ Value* max_val, /*out*/ bool* needs_finite_test); - /** Refines the values with induction of next outer loop. Returns true on change. */ - bool RefineOuter(/*in-out*/ Value* min_val, - /*in-out*/ Value* max_val) const; - /** * Returns true if range analysis is able to generate code for the lower and upper * bound expressions on the instruction in the given context. The need_finite_test @@ -132,11 +130,20 @@ class InductionVarRange { */ bool IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, - /*out*/ int64_t *value) const; + /*out*/ int64_t* value) const; + + /** Returns whether induction information can be obtained. */ + bool HasInductionInfo(HInstruction* context, + HInstruction* instruction, + /*out*/ HLoopInformation** loop, + /*out*/ HInductionVarAnalysis::InductionInfo** info, + /*out*/ HInductionVarAnalysis::InductionInfo** trip) const; + bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const; bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const; bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const; bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const; + bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const; Value GetLinear(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, @@ -161,8 +168,16 @@ class InductionVarRange { bool in_body, bool is_min) const; - Value MulRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const; - Value DivRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const; + Value MulRangeAndConstant(int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + Value DivRangeAndConstant(int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; Value AddValue(Value v1, Value v2) const; Value SubValue(Value v1, Value v2) const; @@ -171,12 +186,6 @@ class InductionVarRange { Value MergeVal(Value v1, Value v2, bool is_min) const; /** - * Returns refined value using induction of next outer loop or the input value if no - * further refinement is possible. - */ - Value RefineOuter(Value val, bool is_min) const; - - /** * Generates code for lower/upper/taken-test in the HIR. Returns true on success. * With values nullptr, the method can be used to determine if code generation * would be successful without generating actual code yet. @@ -200,7 +209,10 @@ class InductionVarRange { bool is_min) const; /** Results of prior induction variable analysis. */ - HInductionVarAnalysis *induction_analysis_; + HInductionVarAnalysis* induction_analysis_; + + /** Instruction at which chasing may stop. */ + HInstruction* chase_hint_; friend class HInductionVarAnalysis; friend class InductionVarRangeTest; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index dc04dc2c49..4ea170f659 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -66,6 +66,8 @@ class InductionVarRangeTest : public CommonCompilerTest { entry_block_->AddInstruction(x_); y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); entry_block_->AddInstruction(y_); + // Set arbitrary range analysis hint while testing private methods. + SetHint(x_); } /** Constructs loop with given upper bound. */ @@ -111,6 +113,11 @@ class InductionVarRangeTest : public CommonCompilerTest { iva_->Run(); } + /** Sets hint. */ + void SetHint(HInstruction* hint) { + range_.chase_hint_ = hint; + } + /** Constructs an invariant. */ HInductionVarAnalysis::InductionInfo* CreateInvariant(char opc, HInductionVarAnalysis::InductionInfo* a, @@ -122,6 +129,7 @@ class InductionVarRangeTest : public CommonCompilerTest { case 'n': op = HInductionVarAnalysis::kNeg; break; case '*': op = HInductionVarAnalysis::kMul; break; case '/': op = HInductionVarAnalysis::kDiv; break; + case '<': op = HInductionVarAnalysis::kLT; break; default: op = HInductionVarAnalysis::kNop; break; } return iva_->CreateInvariantOp(op, a, b); @@ -137,22 +145,21 @@ class InductionVarRangeTest : public CommonCompilerTest { return CreateFetch(graph_->GetIntConstant(c)); } - /** Constructs a trip-count. */ + /** Constructs a constant trip-count. */ HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) { - Primitive::Type type = Primitive::kPrimInt; + HInductionVarAnalysis::InductionOp op = HInductionVarAnalysis::kTripCountInBodyUnsafe; if (in_loop && safe) { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr, type); + op = HInductionVarAnalysis::kTripCountInLoop; } else if (in_loop) { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr, type); + op = HInductionVarAnalysis::kTripCountInLoopUnsafe; } else if (safe) { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr, type); - } else { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr, type); + op = HInductionVarAnalysis::kTripCountInBody; } + // Return TC with taken-test 0 < TC. + return iva_->CreateTripCount(op, + CreateConst(tc), + CreateInvariant('<', CreateConst(0), CreateConst(tc)), + Primitive::kPrimInt); } /** Constructs a linear a * i + b induction. */ @@ -197,13 +204,13 @@ class InductionVarRangeTest : public CommonCompilerTest { } Value GetMin(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* induc) { - return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true); + HInductionVarAnalysis::InductionInfo* trip) { + return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* induc) { - return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false); + HInductionVarAnalysis::InductionInfo* trip) { + return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, @@ -558,6 +565,31 @@ TEST_F(InductionVarRangeTest, MaxValue) { ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50))); } +TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { + HInstruction* new_array = new (&allocator_) + HNewArray(x_, + graph_->GetCurrentMethod(), + 0, Primitive::kPrimInt, + graph_->GetDexFile(), + kQuickAllocArray); + entry_block_->AddInstruction(new_array); + HInstruction* array_length = new (&allocator_) HArrayLength(new_array, 0); + entry_block_->AddInstruction(array_length); + // With null hint: yields extreme constants. + const int32_t max_value = std::numeric_limits<int32_t>::max(); + SetHint(nullptr); + ExpectEqual(Value(0), GetMin(CreateFetch(array_length), nullptr)); + ExpectEqual(Value(max_value), GetMax(CreateFetch(array_length), nullptr)); + // With explicit hint: yields the length instruction. + SetHint(array_length); + ExpectEqual(Value(array_length, 1, 0), GetMin(CreateFetch(array_length), nullptr)); + ExpectEqual(Value(array_length, 1, 0), GetMax(CreateFetch(array_length), nullptr)); + // With any non-null hint: chases beyond the length instruction. + SetHint(x_); + ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(array_length), nullptr)); + ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr)); +} + // // Tests on public methods. // @@ -570,23 +602,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) { bool needs_finite_test = true; // In context of header: known. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); } TEST_F(InductionVarRangeTest, ConstantTripCountDown) { @@ -597,23 +626,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { bool needs_finite_test = true; // In context of header: known. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); } TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { @@ -625,23 +651,20 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { bool needs_taken_test = true; // In context of header: upper unknown. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(x_, 1, -1), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(x_, 1, 0), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); HInstruction* lower = nullptr; HInstruction* upper = nullptr; @@ -695,23 +718,20 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { bool needs_taken_test = true; // In context of header: lower unknown. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(x_, 1, 1), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(x_, 1, 0), v1); ExpectEqual(Value(999), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); HInstruction* lower = nullptr; HInstruction* upper = nullptr; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 8f2db3d1d3..6c1292cf66 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -187,12 +187,12 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol static uint32_t FindMethodIndexIn(ArtMethod* method, const DexFile& dex_file, - uint32_t referrer_index) + uint32_t name_and_signature_index) SHARED_REQUIRES(Locks::mutator_lock_) { if (IsSameDexFile(*method->GetDexFile(), dex_file)) { return method->GetDexMethodIndex(); } else { - return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); + return method->FindDexMethodIndexInOtherDexFile(dex_file, name_and_signature_index); } } @@ -750,7 +750,40 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) { HInstruction* return_replacement = nullptr; if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) { - return false; + if (invoke_instruction->IsInvokeInterface()) { + // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always + // better than an invoke-interface because: + // 1) In the best case, the interface call has one more indirection (to fetch the IMT). + // 2) We will not go to the conflict trampoline with an invoke-virtual. + // TODO: Consider sharpening once it is not dependent on the compiler driver. + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + uint32_t method_index = FindMethodIndexIn( + method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); + if (method_index == DexFile::kDexNoIndex) { + return false; + } + HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual( + graph_->GetArena(), + invoke_instruction->GetNumberOfArguments(), + invoke_instruction->GetType(), + invoke_instruction->GetDexPc(), + method_index, + method->GetMethodIndex()); + HInputsRef inputs = invoke_instruction->GetInputs(); + for (size_t index = 0; index != inputs.size(); ++index) { + new_invoke->SetArgumentAt(index, inputs[index]); + } + invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); + new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + if (invoke_instruction->GetType() == Primitive::kPrimNot) { + new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + } + return_replacement = new_invoke; + } else { + // TODO: Consider sharpening an invoke virtual once it is not dependent on the + // compiler driver. + return false; + } } if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); @@ -1239,14 +1272,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - if (current->IsInvokeInterface()) { - // Disable inlining of interface calls. The cost in case of entering the - // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it has an interface call."; - return false; - } - if (!same_dex_file && current->NeedsEnvironment()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 3041c4d2c7..e0410dcdb2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -54,6 +54,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { // De Morgan's laws: // ~a & ~b = ~(a | b) and ~a | ~b = ~(a & b) bool TryDeMorganNegationFactoring(HBinaryOperation* op); + bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); + bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + void VisitShift(HBinaryOperation* shift); void VisitEqual(HEqual* equal) OVERRIDE; @@ -963,7 +966,18 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { return; } - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); + + if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) && + TrySubtractionChainSimplification(instruction)) { + return; + } } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { @@ -1025,7 +1039,13 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { return; } - TryDeMorganNegationFactoring(instruction); + if (TryDeMorganNegationFactoring(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) { @@ -1242,6 +1262,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { instruction->ReplaceWith(input_cst); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor)) { // Replace code looking like // MUL dst, src, pow_of_2 @@ -1251,6 +1272,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new (allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor - 1)) { // Transform code looking like // MUL dst, src, (2^n + 1) @@ -1265,6 +1287,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { block->InsertInstructionBefore(shl, instruction); block->ReplaceAndRemoveInstructionWith(instruction, add); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor + 1)) { // Transform code looking like // MUL dst, src, (2^n - 1) @@ -1279,8 +1302,13 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { block->InsertInstructionBefore(shl, instruction); block->ReplaceAndRemoveInstructionWith(instruction, sub); RecordSimplification(); + return; } } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { @@ -1380,7 +1408,13 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { if (TryDeMorganNegationFactoring(instruction)) return; - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { @@ -1471,6 +1505,11 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); left->GetBlock()->RemoveInstruction(left); + return; + } + + if (TrySubtractionChainSimplification(instruction)) { + return; } } @@ -1524,7 +1563,13 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { return; } - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { @@ -1823,4 +1868,150 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) { } } +// Replace code looking like +// OP y, x, const1 +// OP z, y, const2 +// with +// OP z, x, const3 +// where OP is both an associative and a commutative operation. +bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation( + HBinaryOperation* instruction) { + DCHECK(instruction->IsCommutative()); + + if (!Primitive::IsIntegralType(instruction->GetType())) { + return false; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Variable names as described above. + HConstant* const2; + HBinaryOperation* y; + + if (instruction->InstructionTypeEquals(left) && right->IsConstant()) { + const2 = right->AsConstant(); + y = left->AsBinaryOperation(); + } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) { + const2 = left->AsConstant(); + y = right->AsBinaryOperation(); + } else { + // The node does not match the pattern. + return false; + } + + // If `y` has more than one use, we do not perform the optimization + // because it might increase code size (e.g. if the new constant is + // no longer encodable as an immediate operand in the target ISA). + if (!y->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // GetConstantRight() can return both left and right constants + // for commutative operations. + HConstant* const1 = y->GetConstantRight(); + if (const1 == nullptr) { + return false; + } + + instruction->ReplaceInput(const1, 0); + instruction->ReplaceInput(const2, 1); + HConstant* const3 = instruction->TryStaticEvaluation(); + DCHECK(const3 != nullptr); + instruction->ReplaceInput(y->GetLeastConstantLeft(), 0); + instruction->ReplaceInput(const3, 1); + RecordSimplification(); + return true; +} + +static HBinaryOperation* AsAddOrSub(HInstruction* binop) { + return (binop->IsAdd() || binop->IsSub()) ? binop->AsBinaryOperation() : nullptr; +} + +// Helper function that performs addition statically, considering the result type. +static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) { + // Use the Compute() method for consistency with TryStaticEvaluation(). + if (type == Primitive::kPrimInt) { + return HAdd::Compute<int32_t>(x, y); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + return HAdd::Compute<int64_t>(x, y); + } +} + +// Helper function that handles the child classes of HConstant +// and returns an integer with the appropriate sign. +static int64_t GetValue(HConstant* constant, bool is_negated) { + int64_t ret = Int64FromConstant(constant); + return is_negated ? -ret : ret; +} + +// Replace code looking like +// OP1 y, x, const1 +// OP2 z, y, const2 +// with +// OP3 z, x, const3 +// where OPx is either ADD or SUB, and at least one of OP{1,2} is SUB. +bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( + HBinaryOperation* instruction) { + DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName(); + + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntegralType(type)) { + return false; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Variable names as described above. + HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant(); + if (const2 == nullptr) { + return false; + } + + HBinaryOperation* y = (AsAddOrSub(left) != nullptr) + ? left->AsBinaryOperation() + : AsAddOrSub(right); + // If y has more than one use, we do not perform the optimization because + // it might increase code size (e.g. if the new constant is no longer + // encodable as an immediate operand in the target ISA). + if ((y == nullptr) || !y->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + left = y->GetLeft(); + HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant(); + if (const1 == nullptr) { + return false; + } + + HInstruction* x = (const1 == left) ? y->GetRight() : left; + // If both inputs are constants, let the constant folding pass deal with it. + if (x->IsConstant()) { + return false; + } + + bool is_const2_negated = (const2 == right) && instruction->IsSub(); + int64_t const2_val = GetValue(const2, is_const2_negated); + bool is_y_negated = (y == right) && instruction->IsSub(); + right = y->GetRight(); + bool is_const1_negated = is_y_negated ^ ((const1 == right) && y->IsSub()); + int64_t const1_val = GetValue(const1, is_const1_negated); + bool is_x_negated = is_y_negated ^ ((x == right) && y->IsSub()); + int64_t const3_val = ComputeAddition(type, const1_val, const2_val); + HBasicBlock* block = instruction->GetBlock(); + HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val); + ArenaAllocator* arena = instruction->GetArena(); + HInstruction* z; + + if (is_x_negated) { + z = new (arena) HSub(type, const3, x, instruction->GetDexPc()); + } else { + z = new (arena) HAdd(type, x, const3, instruction->GetDexPc()); + } + + block->ReplaceAndRemoveInstructionWith(instruction, z); + RecordSimplification(); + return true; +} + } // namespace art diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index f43f8edf06..579fb9d3bb 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1365,7 +1365,6 @@ static void CheckPosition(ArmAssembler* assembler, Register input, Location length, SlowPathCode* slow_path, - Register input_len, Register temp, bool length_is_input_length = false) { // Where is the length in the Array? @@ -1386,8 +1385,8 @@ static void CheckPosition(ArmAssembler* assembler, } } else { // Check that length(input) >= pos. - __ LoadFromOffset(kLoadWord, input_len, input, length_offset); - __ subs(temp, input_len, ShifterOperand(pos_const)); + __ LoadFromOffset(kLoadWord, temp, input, length_offset); + __ subs(temp, temp, ShifterOperand(pos_const)); __ b(slow_path->GetEntryLabel(), LT); // Check that (length(input) - pos) >= length. @@ -1451,20 +1450,26 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); - if (!optimizations.GetDestinationIsSource() && - (!src_pos.IsConstant() || !dest_pos.IsConstant())) { - __ cmp(src, ShifterOperand(dest)); - } // If source and destination are the same, we go to slow path if we need to do // forward copying. if (src_pos.IsConstant()) { int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ cmp(src, ShifterOperand(dest)); + __ b(slow_path->GetEntryLabel(), EQ); + } + // Checked when building locations. DCHECK(!optimizations.GetDestinationIsSource() || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { + __ cmp(src, ShifterOperand(dest)); __ b(&conditions_on_positions_validated, NE); } __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant)); @@ -1472,6 +1477,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } } else { if (!optimizations.GetDestinationIsSource()) { + __ cmp(src, ShifterOperand(dest)); __ b(&conditions_on_positions_validated, NE); } if (dest_pos.IsConstant()) { @@ -1511,7 +1517,6 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { length, slow_path, temp1, - temp2, optimizations.GetCountIsSourceLength()); // Validity checks: dest. @@ -1521,7 +1526,6 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { length, slow_path, temp1, - temp2, optimizations.GetCountIsDestinationLength()); if (!optimizations.GetDoesNotNeedTypeCheck()) { @@ -1599,7 +1603,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // Compute base source address, base destination address, and end source address. - uint32_t element_size = sizeof(int32_t); + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); @@ -1625,8 +1629,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison, nor do any read barrier as the next uses of the destination - // array will do it. + // poison/unpoison. Label loop, done; __ cmp(temp1, ShifterOperand(temp3)); __ b(&done, EQ); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 1685cf9c3c..1d507530aa 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -608,54 +608,66 @@ void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) { __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); } -static void CreateFPToIntPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); } -static void GenMathRound(LocationSummary* locations, - bool is_double, - vixl::MacroAssembler* masm) { - FPRegister in_reg = is_double ? - DRegisterFrom(locations->InAt(0)) : SRegisterFrom(locations->InAt(0)); - Register out_reg = is_double ? - XRegisterFrom(locations->Out()) : WRegisterFrom(locations->Out()); - UseScratchRegisterScope temps(masm); - FPRegister temp1_reg = temps.AcquireSameSizeAs(in_reg); +static void GenMathRound(HInvoke* invoke, bool is_double, vixl::MacroAssembler* masm) { + // Java 8 API definition for Math.round(): + // Return the closest long or int to the argument, with ties rounding to positive infinity. + // + // There is no single instruction in ARMv8 that can support the above definition. + // We choose to use FCVTAS here, because it has closest semantic. + // FCVTAS performs rounding to nearest integer, ties away from zero. + // For most inputs (positive values, zero or NaN), this instruction is enough. + // We only need a few handling code after FCVTAS if the input is negative half value. + // + // The reason why we didn't choose FCVTPS instruction here is that + // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest. + // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2. + // If we were using this instruction, for most inputs, more handling code would be needed. + LocationSummary* l = invoke->GetLocations(); + FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0)); + FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0)); + Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out()); + vixl::Label done; - // 0.5 can be encoded as an immediate, so use fmov. - if (is_double) { - __ Fmov(temp1_reg, static_cast<double>(0.5)); - } else { - __ Fmov(temp1_reg, static_cast<float>(0.5)); - } - __ Fadd(temp1_reg, in_reg, temp1_reg); - __ Fcvtms(out_reg, temp1_reg); + // Round to nearest integer, ties away from zero. + __ Fcvtas(out_reg, in_reg); + + // For positive values, zero or NaN inputs, rounding is done. + __ Tbz(out_reg, out_reg.size() - 1, &done); + + // Handle input < 0 cases. + // If input is negative but not a tie, previous result (round to nearest) is valid. + // If input is a negative tie, out_reg += 1. + __ Frinta(tmp_fp, in_reg); + __ Fsub(tmp_fp, in_reg, tmp_fp); + __ Fcmp(tmp_fp, 0.5); + __ Cinc(out_reg, out_reg, eq); + + __ Bind(&done); } void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { - // See intrinsics.h. - if (kRoundIsPlusPointFive) { - CreateFPToIntPlusTempLocations(arena_, invoke); - } + CreateFPToIntPlusFPTempLocations(arena_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { - GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler()); + GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (kRoundIsPlusPointFive) { - CreateFPToIntPlusTempLocations(arena_, invoke); - } + CreateFPToIntPlusFPTempLocations(arena_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { - GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler()); + GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { @@ -1840,7 +1852,6 @@ static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm, const Register& input, const Location& length, SlowPathCodeARM64* slow_path, - const Register& input_len, const Register& temp, bool length_is_input_length = false) { const int32_t length_offset = mirror::Array::LengthOffset().Int32Value(); @@ -1855,8 +1866,8 @@ static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm, } } else { // Check that length(input) >= pos. - __ Ldr(input_len, MemOperand(input, length_offset)); - __ Subs(temp, input_len, pos_const); + __ Ldr(temp, MemOperand(input, length_offset)); + __ Subs(temp, temp, pos_const); __ B(slow_path->GetEntryLabel(), lt); // Check that (length(input) - pos) >= length. @@ -1967,7 +1978,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { length, slow_path, src_curr_addr, - dst_curr_addr, false); CheckSystemArrayCopyPosition(masm, @@ -1976,7 +1986,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { length, slow_path, src_curr_addr, - dst_curr_addr, false); src_curr_addr = src_curr_addr.X(); @@ -2101,20 +2110,25 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { vixl::Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); - if (!optimizations.GetDestinationIsSource() && - (!src_pos.IsConstant() || !dest_pos.IsConstant())) { - __ Cmp(src, dest); - } // If source and destination are the same, we go to slow path if we need to do // forward copying. if (src_pos.IsConstant()) { int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ Cmp(src, dest); + __ B(slow_path->GetEntryLabel(), eq); + } // Checked when building locations. DCHECK(!optimizations.GetDestinationIsSource() || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); __ B(&conditions_on_positions_validated, ne); } __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); @@ -2122,6 +2136,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } } else { if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); __ B(&conditions_on_positions_validated, ne); } __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), @@ -2158,7 +2173,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { length, slow_path, temp1, - temp2, optimizations.GetCountIsSourceLength()); // Validity checks: dest. @@ -2168,7 +2182,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { length, slow_path, temp1, - temp2, optimizations.GetCountIsDestinationLength()); { // We use a block to end the scratch scope before the write barrier, thus @@ -2264,8 +2277,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src_stop_addr); // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison, nor do any read barrier as the next uses of the destination - // array will do it. + // poison/unpoison. vixl::Label loop, done; const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); __ Bind(&loop); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 9243f4c93f..cc4971b8f8 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -876,6 +876,151 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) { GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler()); } +static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Primitive::Type type) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble); + + Mips64Label done; + Mips64Label finite; + Mips64Label add; + + // if (in.isNaN) { + // return 0; + // } + // + // out = floor(in); + // + // /* + // * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed. + // * + // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative + // * numbers which are too large to be represented in a 32-/64-bit + // * signed integer will be processed by floor.X.Y to output + // * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be + // * processed by this "if" statement. + // * + // * However, this bug in the 64-bit MIPS emulator causes the + // * behavior of floor.X.Y to be the same as pre-R6 implementations + // * of MIPS64. When that bug is fixed this logic should be amended. + // */ + // if (out == MAX_VALUE) { + // TMP = (in < 0.0) ? 1 : 0; + // /* + // * If TMP is 1, then adding it to out will wrap its value from + // * MAX_VALUE to MIN_VALUE. + // */ + // return out += TMP; + // } + // + // /* + // * For negative values not handled by the previous "if" statement the + // * test here will correctly set the value of TMP. + // */ + // TMP = ((in - out) >= 0.5) ? 1 : 0; + // return out += TMP; + + // Test for NaN. + if (type == Primitive::kPrimDouble) { + __ CmpUnD(FTMP, in, in); + } else { + __ CmpUnS(FTMP, in, in); + } + + // Return zero for NaN. + __ Move(out, ZERO); + __ Bc1nez(FTMP, &done); + + // out = floor(in); + if (type == Primitive::kPrimDouble) { + __ FloorLD(FTMP, in); + __ Dmfc1(out, FTMP); + } else { + __ FloorWS(FTMP, in); + __ Mfc1(out, FTMP); + } + + // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0; + if (type == Primitive::kPrimDouble) { + __ LoadConst64(AT, std::numeric_limits<int64_t>::max()); + } else { + __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); + } + __ Bnec(AT, out, &finite); + + if (type == Primitive::kPrimDouble) { + __ Dmtc1(ZERO, FTMP); + __ CmpLtD(FTMP, in, FTMP); + __ Dmfc1(AT, FTMP); + } else { + __ Mtc1(ZERO, FTMP); + __ CmpLtS(FTMP, in, FTMP); + __ Mfc1(AT, FTMP); + } + + __ Bc(&add); + + __ Bind(&finite); + + // TMP = (0.5 <= (in - out)) ? -1 : 0; + if (type == Primitive::kPrimDouble) { + __ Cvtdl(FTMP, FTMP); // Convert output of floor.l.d back to "double". + __ LoadConst64(AT, bit_cast<int64_t, double>(0.5)); + __ SubD(FTMP, in, FTMP); + __ Dmtc1(AT, half); + __ CmpLeD(FTMP, half, FTMP); + __ Dmfc1(AT, FTMP); + } else { + __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". + __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); + __ SubS(FTMP, in, FTMP); + __ Mtc1(AT, half); + __ CmpLeS(FTMP, half, FTMP); + __ Mfc1(AT, FTMP); + } + + __ Bind(&add); + + // Return out -= TMP. + if (type == Primitive::kPrimDouble) { + __ Dsubu(out, out, AT); + } else { + __ Subu(out, out, AT); + } + + __ Bind(&done); +} + +// int java.lang.Math.round(float) +void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) { + GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimFloat); +} + +// long java.lang.Math.round(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) { + GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimDouble); +} + // byte libcore.io.Memory.peekByte(long address) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); @@ -1734,9 +1879,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } -UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat) - UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 031cd1313c..812bdf550e 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1070,30 +1070,45 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { static void CheckPosition(X86Assembler* assembler, Location pos, Register input, - Register length, + Location length, SlowPathCode* slow_path, - Register input_len, - Register temp) { - // Where is the length in the String? + Register temp, + bool length_is_input_length = false) { + // Where is the length in the Array? const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); if (pos.IsConstant()) { int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); if (pos_const == 0) { - // Check that length(input) >= length. - __ cmpl(Address(input, length_offset), length); - __ j(kLess, slow_path->GetEntryLabel()); + if (!length_is_input_length) { + // Check that length(input) >= length. + if (length.IsConstant()) { + __ cmpl(Address(input, length_offset), + Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(Address(input, length_offset), length.AsRegister<Register>()); + } + __ j(kLess, slow_path->GetEntryLabel()); + } } else { // Check that length(input) >= pos. - __ movl(input_len, Address(input, length_offset)); - __ cmpl(input_len, Immediate(pos_const)); + __ movl(temp, Address(input, length_offset)); + __ subl(temp, Immediate(pos_const)); __ j(kLess, slow_path->GetEntryLabel()); // Check that (length(input) - pos) >= length. - __ leal(temp, Address(input_len, -pos_const)); - __ cmpl(temp, length); + if (length.IsConstant()) { + __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(temp, length.AsRegister<Register>()); + } __ j(kLess, slow_path->GetEntryLabel()); } + } else if (length_is_input_length) { + // The only way the copy can succeed is if pos is zero. + Register pos_reg = pos.AsRegister<Register>(); + __ testl(pos_reg, pos_reg); + __ j(kNotEqual, slow_path->GetEntryLabel()); } else { // Check that pos >= 0. Register pos_reg = pos.AsRegister<Register>(); @@ -1107,7 +1122,11 @@ static void CheckPosition(X86Assembler* assembler, // Check that (length(input) - pos) >= length. __ movl(temp, Address(input, length_offset)); __ subl(temp, pos_reg); - __ cmpl(temp, length); + if (length.IsConstant()) { + __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(temp, length.AsRegister<Register>()); + } __ j(kLess, slow_path->GetEntryLabel()); } } @@ -1159,11 +1178,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { __ movl(count, length.AsRegister<Register>()); } - // Validity checks: source. - CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base); + // Validity checks: source. Use src_base as a temporary register. + CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base); - // Validity checks: dest. - CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base); + // Validity checks: dest. Use src_base as a temporary register. + CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base); // Okay, everything checks out. Finally time to do the copy. // Check assumption that sizeof(Char) is 2 (used in scaling below). @@ -2646,8 +2665,274 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { + return instruction->InputAt(input0) == instruction->InputAt(input1); +} + +void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); + if (invoke->GetLocations() != nullptr) { + // Need a byte register for marking. + invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX)); + + static constexpr size_t kSrc = 0; + static constexpr size_t kSrcPos = 1; + static constexpr size_t kDest = 2; + static constexpr size_t kDestPos = 3; + static constexpr size_t kLength = 4; + + if (!invoke->InputAt(kSrcPos)->IsIntConstant() && + !invoke->InputAt(kDestPos)->IsIntConstant() && + !invoke->InputAt(kLength)->IsIntConstant()) { + if (!IsSameInput(invoke, kSrcPos, kDestPos) && + !IsSameInput(invoke, kSrcPos, kLength) && + !IsSameInput(invoke, kDestPos, kLength) && + !IsSameInput(invoke, kSrc, kDest)) { + // Not enough registers, make the length also take a stack slot. + invoke->GetLocations()->SetInAt(kLength, Location::Any()); + } + } + } +} + +void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + + Register src = locations->InAt(0).AsRegister<Register>(); + Location src_pos = locations->InAt(1); + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + + SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + + NearLabel conditions_on_positions_validated; + SystemArrayCopyOptimizations optimizations(invoke); + + // If source and destination are the same, we go to slow path if we need to do + // forward copying. + if (src_pos.IsConstant()) { + int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ cmpl(src, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ cmpl(src, dest); + __ j(kNotEqual, &conditions_on_positions_validated); + } + __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); + __ j(kGreater, slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ cmpl(src, dest); + __ j(kNotEqual, &conditions_on_positions_validated); + } + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); + __ j(kLess, slow_path->GetEntryLabel()); + } else { + __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); + __ j(kLess, slow_path->GetEntryLabel()); + } + } + + __ Bind(&conditions_on_positions_validated); + + if (!optimizations.GetSourceIsNotNull()) { + // Bail out if the source is null. + __ testl(src, src); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { + // Bail out if the destination is null. + __ testl(dest, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + if (length.IsStackSlot()) { + __ movl(temp3, Address(ESP, length.GetStackIndex())); + length = Location::RegisterLocation(temp3); + } + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant() && + !optimizations.GetCountIsSourceLength() && + !optimizations.GetCountIsDestinationLength()) { + __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); + __ j(kLess, slow_path->GetEntryLabel()); + } + + // Validity checks: source. + CheckPosition(assembler, + src_pos, + src, + length, + slow_path, + temp1, + optimizations.GetCountIsSourceLength()); + + // Validity checks: dest. + CheckPosition(assembler, + dest_pos, + dest, + length, + slow_path, + temp1, + optimizations.GetCountIsDestinationLength()); + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = temp1->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp1); + __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = temp1->klass_ + __ movl(temp1, Address(dest, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ movl(temp2, Address(temp1, component_offset)); + __ testl(temp2, temp2); + __ j(kEqual, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp2); + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + // Re-poison the heap reference to make the compare instruction below + // compare two poisoned references. + __ PoisonHeapReference(temp1); + } else { + // /* HeapReference<Class> */ temp1 = temp1->klass_ + __ movl(temp1, Address(dest, class_offset)); + } + + // Note: if poisoning is on, we are here comparing two poisoned references. + __ cmpl(temp1, Address(src, class_offset)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ MaybeUnpoisonHeapReference(temp1); + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp1); + __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + // Compute base source address, base destination address, and end source address. + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + DCHECK_EQ(element_size, 4); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp1, Address(src, element_size * constant + offset)); + } else { + __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + + // We only need one card marking on the destination array. + codegen_->MarkGCCard(temp1, + temp2, + dest, + Register(kNoRegister), + /* value_can_be_null */ false); + + __ Bind(slow_path->GetExitLabel()); +} + UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index c5b44d4f5c..891aaf5ff9 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -922,7 +922,6 @@ static void CheckPosition(X86_64Assembler* assembler, CpuRegister input, Location length, SlowPathCode* slow_path, - CpuRegister input_len, CpuRegister temp, bool length_is_input_length = false) { // Where is the length in the Array? @@ -943,12 +942,11 @@ static void CheckPosition(X86_64Assembler* assembler, } } else { // Check that length(input) >= pos. - __ movl(input_len, Address(input, length_offset)); - __ cmpl(input_len, Immediate(pos_const)); + __ movl(temp, Address(input, length_offset)); + __ subl(temp, Immediate(pos_const)); __ j(kLess, slow_path->GetEntryLabel()); // Check that (length(input) - pos) >= length. - __ leal(temp, Address(input_len, -pos_const)); if (length.IsConstant()) { __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); } else { @@ -1023,11 +1021,11 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { __ j(kLess, slow_path->GetEntryLabel()); } - // Validity checks: source. - CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base); + // Validity checks: source. Use src_base as a temporary register. + CheckPosition(assembler, src_pos, src, length, slow_path, src_base); - // Validity checks: dest. - CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base); + // Validity checks: dest. Use src_base as a temporary register. + CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base); // We need the count in RCX. if (length.IsConstant()) { @@ -1103,20 +1101,22 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); - if (!optimizations.GetDestinationIsSource() && - (!src_pos.IsConstant() || !dest_pos.IsConstant())) { - __ cmpl(src, dest); - } // If source and destination are the same, we go to slow path if we need to do // forward copying. if (src_pos.IsConstant()) { int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); if (dest_pos.IsConstant()) { - // Checked when building locations. - DCHECK(!optimizations.GetDestinationIsSource() - || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ cmpl(src, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + } } else { if (!optimizations.GetDestinationIsSource()) { + __ cmpl(src, dest); __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); @@ -1124,6 +1124,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } } else { if (!optimizations.GetDestinationIsSource()) { + __ cmpl(src, dest); __ j(kNotEqual, &conditions_on_positions_validated); } if (dest_pos.IsConstant()) { @@ -1166,7 +1167,6 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { length, slow_path, temp1, - temp2, optimizations.GetCountIsSourceLength()); // Validity checks: dest. @@ -1176,7 +1176,6 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { length, slow_path, temp1, - temp2, optimizations.GetCountIsDestinationLength()); if (!optimizations.GetDoesNotNeedTypeCheck()) { @@ -1255,7 +1254,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // Compute base source address, base destination address, and end source address. - uint32_t element_size = sizeof(int32_t); + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); @@ -1279,8 +1278,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison, nor do any read barrier as the next uses of the destination - // array will do it. + // poison/unpoison. NearLabel loop, done; __ cmpl(temp1, temp3); __ j(kEqual, &done); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index c2c212b66f..d557f42968 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -754,7 +754,7 @@ bool HBasicBlock::Dominates(HBasicBlock* other) const { } static void UpdateInputsUsers(HInstruction* instruction) { - auto&& inputs = instruction->GetInputs(); + HInputsRef inputs = instruction->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { inputs[i]->AddUseAt(instruction, i); } @@ -1312,8 +1312,8 @@ bool HInstruction::Equals(const HInstruction* other) const { DCHECK_EQ(GetKind(), other->GetKind()); if (!InstructionDataEquals(other)) return false; if (GetType() != other->GetType()) return false; - auto&& inputs = GetInputs(); - auto&& other_inputs = other->GetInputs(); + HConstInputsRef inputs = GetInputs(); + HConstInputsRef other_inputs = other->GetInputs(); if (inputs.size() != other_inputs.size()) return false; for (size_t i = 0; i != inputs.size(); ++i) { if (inputs[i] != other_inputs[i]) return false; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 29df7c8ab8..0f0ef26ea9 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1307,7 +1307,13 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Arm64IntermediateAddress, Instruction) #endif +#ifndef ART_ENABLE_CODEGEN_mips #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ + M(MipsComputeBaseMethodAddress, Instruction) \ + M(MipsDexCacheArraysBase, Instruction) +#endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) @@ -1415,6 +1421,21 @@ class HUserRecord : public ValueObject { typename HUseList<T>::iterator before_use_node_; }; +// Helper class that extracts the input instruction from HUserRecord<HInstruction*>. +// This is used for HInstruction::GetInputs() to return a container wrapper providing +// HInstruction* values even though the underlying container has HUserRecord<>s. +struct HInputExtractor { + HInstruction* operator()(HUserRecord<HInstruction*>& record) const { + return record.GetInstruction(); + } + const HInstruction* operator()(const HUserRecord<HInstruction*>& record) const { + return record.GetInstruction(); + } +}; + +using HInputsRef = TransformArrayRef<HUserRecord<HInstruction*>, HInputExtractor>; +using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInputExtractor>; + /** * Side-effects representation. * @@ -1804,20 +1825,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { const_cast<HInstruction*>(this)->GetInputRecords()); } - auto GetInputs() { - return MakeTransformArrayRef( - GetInputRecords(), - [](HUserRecord<HInstruction*>& record) -> HInstruction* { - return record.GetInstruction(); - }); + HInputsRef GetInputs() { + return MakeTransformArrayRef(GetInputRecords(), HInputExtractor()); } - auto GetInputs() const { - return MakeTransformArrayRef( - GetInputRecords(), - [](const HUserRecord<HInstruction*>& record) -> const HInstruction* { - return record.GetInstruction(); - }); + HConstInputsRef GetInputs() const { + return MakeTransformArrayRef(GetInputRecords(), HInputExtractor()); } size_t InputCount() const { return GetInputRecords().size(); } @@ -2408,7 +2421,7 @@ class HPhi FINAL : public HInstruction { bool IsDead() const { return !IsLive(); } bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); } - bool IsVRegEquivalentOf(HInstruction* other) const { + bool IsVRegEquivalentOf(const HInstruction* other) const { return other != nullptr && other->IsPhi() && other->AsPhi()->GetBlock() == GetBlock() @@ -3160,7 +3173,7 @@ class HEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x == y; } + template <typename T> static bool Compute(T x, T y) { return x == y; } DISALLOW_COPY_AND_ASSIGN(HEqual); }; @@ -3203,7 +3216,7 @@ class HNotEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x != y; } + template <typename T> static bool Compute(T x, T y) { return x != y; } DISALLOW_COPY_AND_ASSIGN(HNotEqual); }; @@ -3240,7 +3253,7 @@ class HLessThan FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x < y; } + template <typename T> static bool Compute(T x, T y) { return x < y; } DISALLOW_COPY_AND_ASSIGN(HLessThan); }; @@ -3277,7 +3290,7 @@ class HLessThanOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x <= y; } + template <typename T> static bool Compute(T x, T y) { return x <= y; } DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual); }; @@ -3314,7 +3327,7 @@ class HGreaterThan FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x > y; } + template <typename T> static bool Compute(T x, T y) { return x > y; } DISALLOW_COPY_AND_ASSIGN(HGreaterThan); }; @@ -3351,7 +3364,7 @@ class HGreaterThanOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x >= y; } + template <typename T> static bool Compute(T x, T y) { return x >= y; } DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual); }; @@ -3389,7 +3402,7 @@ class HBelow FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) < MakeUnsigned(y); } @@ -3429,7 +3442,7 @@ class HBelowOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) <= MakeUnsigned(y); } @@ -3469,7 +3482,7 @@ class HAbove FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) > MakeUnsigned(y); } @@ -3509,7 +3522,7 @@ class HAboveOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) >= MakeUnsigned(y); } @@ -4182,7 +4195,7 @@ class HNeg FINAL : public HUnaryOperation { DCHECK_EQ(result_type, Primitive::PrimitiveKind(input->GetType())); } - template <typename T> T Compute(T x) const { return -x; } + template <typename T> static T Compute(T x) { return -x; } HConstant* Evaluate(HIntConstant* x) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); @@ -4252,7 +4265,7 @@ class HAdd FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x + y; } + template <typename T> static T Compute(T x, T y) { return x + y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4285,7 +4298,7 @@ class HSub FINAL : public HBinaryOperation { uint32_t dex_pc = kNoDexPc) : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} - template <typename T> T Compute(T x, T y) const { return x - y; } + template <typename T> static T Compute(T x, T y) { return x - y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4320,7 +4333,7 @@ class HMul FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x * y; } + template <typename T> static T Compute(T x, T y) { return x * y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4486,7 +4499,7 @@ class HShl FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_distance) const { + static T Compute(T value, int32_t distance, int32_t max_shift_distance) { return value << (distance & max_shift_distance); } @@ -4532,7 +4545,7 @@ class HShr FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_distance) const { + static T Compute(T value, int32_t distance, int32_t max_shift_distance) { return value >> (distance & max_shift_distance); } @@ -4578,7 +4591,7 @@ class HUShr FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_distance) const { + static T Compute(T value, int32_t distance, int32_t max_shift_distance) { typedef typename std::make_unsigned<T>::type V; V ux = static_cast<V>(value); return static_cast<T>(ux >> (distance & max_shift_distance)); @@ -4624,7 +4637,7 @@ class HAnd FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x & y; } + template <typename T> static T Compute(T x, T y) { return x & y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4661,7 +4674,7 @@ class HOr FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x | y; } + template <typename T> static T Compute(T x, T y) { return x | y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4698,7 +4711,7 @@ class HXor FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x ^ y; } + template <typename T> static T Compute(T x, T y) { return x ^ y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4734,7 +4747,7 @@ class HRor FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_value) const { + static T Compute(T value, int32_t distance, int32_t max_shift_value) { typedef typename std::make_unsigned<T>::type V; V ux = static_cast<V>(value); if ((distance & max_shift_value) == 0) { @@ -4830,7 +4843,7 @@ class HNot FINAL : public HUnaryOperation { return true; } - template <typename T> T Compute(T x) const { return ~x; } + template <typename T> static T Compute(T x) { return ~x; } HConstant* Evaluate(HIntConstant* x) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); @@ -4863,7 +4876,7 @@ class HBooleanNot FINAL : public HUnaryOperation { return true; } - template <typename T> bool Compute(T x) const { + template <typename T> static bool Compute(T x) { DCHECK(IsUint<1>(x)) << x; return !x; } @@ -5029,7 +5042,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> { } bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { - return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; + return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } size_t ComputeHashCode() const OVERRIDE { @@ -5076,7 +5089,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { } bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { - return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; + return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } const FieldInfo& GetFieldInfo() const { return field_info_; } @@ -6544,6 +6557,9 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "nodes_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "nodes_x86.h" #endif diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h new file mode 100644 index 0000000000..de77245e17 --- /dev/null +++ b/compiler/optimizing/nodes_mips.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ + +namespace art { + +// Compute the address of the method for MIPS Constant area support. +class HMipsComputeBaseMethodAddress : public HExpression<0> { + public: + // Treat the value as an int32_t, but it is really a 32 bit native pointer. + HMipsComputeBaseMethodAddress() + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {} + + bool CanBeMoved() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); + + private: + DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); +}; + +class HMipsDexCacheArraysBase : public HExpression<0> { + public: + explicit HMipsDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + bool CanBeMoved() const OVERRIDE { return true; } + + void UpdateElementOffset(size_t element_offset) { + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(MipsDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 764160adce..05eb06333e 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -32,21 +32,21 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000012: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, - 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9, - 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, + 0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9, + 0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, + 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, - 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, + 0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; // 0x00000000: str x0, [sp, #-64]! // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: stp x20, x21, [sp, #40] +// 0x00000004: str x20, [sp, #40] // 0x00000008: .cfi_offset: r20 at cfa-24 -// 0x00000008: .cfi_offset: r21 at cfa-16 -// 0x00000008: str lr, [sp, #56] +// 0x00000008: stp x21, lr, [sp, #48] +// 0x0000000c: .cfi_offset: r21 at cfa-16 // 0x0000000c: .cfi_offset: r30 at cfa-8 // 0x0000000c: stp d8, d9, [sp, #24] // 0x00000010: .cfi_offset_extended: r72 at cfa-40 @@ -55,10 +55,10 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000010: ldp d8, d9, [sp, #24] // 0x00000014: .cfi_restore_extended: r72 // 0x00000014: .cfi_restore_extended: r73 -// 0x00000014: ldp x20, x21, [sp, #40] +// 0x00000014: ldr x20, [sp, #40] // 0x00000018: .cfi_restore: r20 -// 0x00000018: .cfi_restore: r21 -// 0x00000018: ldr lr, [sp, #56] +// 0x00000018: ldp x21, lr, [sp, #48] +// 0x0000001c: .cfi_restore: r21 // 0x0000001c: .cfi_restore: r30 // 0x0000001c: add sp, sp, #0x40 (64) // 0x00000020: .cfi_def_cfa_offset: 0 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c9a4bfe987..d703b0f94f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -28,6 +28,11 @@ #include "instruction_simplifier_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "dex_cache_array_fixups_mips.h" +#include "pc_relative_fixups_mips.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" #endif @@ -462,6 +467,20 @@ static void RunArchOptimizations(InstructionSet instruction_set, break; } #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: { + mips::PcRelativeFixups* pc_relative_fixups = + new (arena) mips::PcRelativeFixups(graph, codegen, stats); + mips::DexCacheArrayFixups* dex_cache_array_fixups = + new (arena) mips::DexCacheArrayFixups(graph, stats); + HOptimization* mips_optimizations[] = { + pc_relative_fixups, + dex_cache_array_fixups + }; + RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case kX86: { x86::PcRelativeFixups* pc_relative_fixups = diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc new file mode 100644 index 0000000000..ba405cdb69 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pc_relative_fixups_mips.h" +#include "code_generator_mips.h" +#include "intrinsics_mips.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the constant area base as an input. + */ +class PCRelativeHandlerVisitor : public HGraphVisitor { + public: + PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), + base_(nullptr) {} + + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void InitializePCRelativeBasePointer() { + // Ensure we only initialize the pointer once. + if (base_ != nullptr) { + return; + } + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); + DCHECK(base_ != nullptr); + } + + void HandleInvoke(HInvoke* invoke) { + // If this is an invoke-static/-direct with PC-relative dex cache array + // addressing, we need the PC-relative address base. + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct != nullptr) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = + invoke_static_or_direct->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = + invoke_static_or_direct->GetCodePtrLocation(); + + bool has_extra_input = + (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup); + + // We can't add a pointer to the constant area if we already have a current + // method pointer. This may arise when sharpening doesn't remove the current + // method pointer from the invoke. + if (invoke_static_or_direct->HasCurrentMethodInput()) { + DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + CHECK(!has_extra_input); // TODO: review this. + return; + } + + if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) { + InitializePCRelativeBasePointer(); + // Add the extra parameter base_. + invoke_static_or_direct->AddSpecialInput(base_); + } + } + } + + bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) { + if (invoke->GetIntrinsic() != Intrinsics::kNone) { + // This invoke may have intrinsic code generation defined. However, we must + // now also determine if this code generation is truly there and call-free + // (not unimplemented, no bail on instruction features, or call on slow path). + // This is done by actually calling the locations builder on the instruction + // and clearing out the locations once result is known. We assume this + // call only has creating locations as side effects! + IntrinsicLocationsBuilderMIPS builder(codegen_); + bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); + invoke->SetLocations(nullptr); + return success; + } + return false; + } + + CodeGeneratorMIPS* codegen_; + + // The generated HMipsComputeBaseMethodAddress in the entry block needed as an + // input to the HMipsLoadFromConstantTable instructions. + HMipsComputeBaseMethodAddress* base_; +}; + +void PcRelativeFixups::Run() { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); + if (mips_codegen->GetInstructionSetFeatures().IsR6()) { + // Do nothing for R6 because it has PC-relative addressing. + // TODO: review. Move this check into RunArchOptimizations()? + return; + } + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + PCRelativeHandlerVisitor visitor(graph_, codegen_); + visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h new file mode 100644 index 0000000000..1e8b071bb3 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; + +namespace mips { + +class PcRelativeFixups : public HOptimization { + public: + PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, "pc_relative_fixups_mips", stats), + codegen_(codegen) {} + + void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 93116f8bab..921f3dfff6 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -211,7 +211,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } // Ensure that we can load FP arguments from the constant area. - auto&& inputs = invoke->GetInputs(); + HInputsRef inputs = invoke->GetInputs(); for (size_t i = 0; i < inputs.size(); i++) { HConstant* input = inputs[i]->AsConstant(); if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) { diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 696b8c6859..8fb539661f 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -146,7 +146,11 @@ void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); // The allocation entry point that deals with access checks does not work with inlined // methods, so we need to check whether this allocation comes from an inlined method. - if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) { + // We also need to make the same check as for moving clinit check, whether the HLoadClass + // has the clinit check responsibility or not (HLoadClass can throw anyway). + if (has_only_one_use && + !instruction->GetEnvironment()->IsFromInlinedInvoke() && + CanMoveClinitCheck(load_class, instruction)) { // We can remove the load class from the graph. If it needed access checks, we delegate // the access check to the allocation. if (load_class->NeedsAccessCheck()) { @@ -203,7 +207,8 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) const { // Determine if input and user come from the same dex instruction, so that we can move // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user) - // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user). + // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user), + // or from HLoadClass (input) to HNewInstance (user). // Start with a quick dex pc check. if (user->GetDexPc() != input->GetDexPc()) { diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index f9bef6809f..5891350894 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -39,7 +39,7 @@ class HPrettyPrinter : public HGraphVisitor { } void PrintPostInstruction(HInstruction* instruction) { - auto&& inputs = instruction->GetInputs(); + HConstInputsRef inputs = instruction->GetInputs(); if (!inputs.empty()) { PrintString("("); bool first = true; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 3dfd7282cd..965d5ee4f9 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -816,10 +816,10 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { DCHECK(instr->IsLive()); - auto&& inputs = instr->GetInputs(); + HInputsRef inputs = instr->GetInputs(); size_t first_input_index_not_null = 0; while (first_input_index_not_null < inputs.size() && - inputs[first_input_index_not_null]->IsNullConstant()) { + inputs[first_input_index_not_null]->IsNullConstant()) { first_input_index_not_null++; } if (first_input_index_not_null == inputs.size()) { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 4a6b835e80..9d99668484 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -753,7 +753,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { if (defined_by != nullptr && !current->IsSplit()) { LocationSummary* locations = defined_by->GetLocations(); if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { - auto&& inputs = defined_by->GetInputs(); + HInputsRef inputs = defined_by->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { // Take the last interval of the input. It is the location of that interval // that will be used at `defined_by`. diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index ed50c69b5d..5a574d9af7 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -181,7 +181,7 @@ bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { return true; } else { DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type)); - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; if (input->GetType() != common_type) { @@ -617,7 +617,7 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) || (next->GetType() != type)) { ArenaAllocator* allocator = graph_->GetArena(); - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type); // Copy the inputs. Note that the graph may not be correctly typed diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 212d93532c..7af4302884 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -177,7 +177,7 @@ void SsaLivenessAnalysis::ComputeLiveness() { static void RecursivelyProcessInputs(HInstruction* current, HInstruction* actual_user, BitVector* live_in) { - auto&& inputs = current->GetInputs(); + HInputsRef inputs = current->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; bool has_in_location = current->GetLocations()->InAt(i).IsValid(); @@ -431,7 +431,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until, // If the instruction dies at the phi assignment, we can try having the // same register. if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) { - auto&& inputs = user->GetInputs(); + HInputsRef inputs = user->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { if (i == input_index) { continue; @@ -472,7 +472,7 @@ int LiveInterval::FindHintAtDefinition() const { if (defined_by_->IsPhi()) { // Try to use the same register as one of the inputs. const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors(); - auto&& inputs = defined_by_->GetInputs(); + HInputsRef inputs = defined_by_->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { size_t end = predecessors[i]->GetLifetimeEnd(); LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(end - 1); |